making initial code2 codebase diretory

This commit is contained in:
2025-09-14 16:05:56 -04:00
parent 8125f1680c
commit 34e9f590ff
27 changed files with 5226 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
VEFontCache Odin
Copyright 2024 Edward R. Gonzalez
This project is based on Vertex Engine GPU Font Cache
by Xi Chen (https://github.com/hypernewbie/VEFontCache). It has been substantially
rewritten and redesigned for the Odin programming language.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

289
code2/vefontcache/LRU.odin Normal file
View File

@@ -0,0 +1,289 @@
package vefontcache
/* Note(Ed):
Original implementation has been changed moderately.
Notably the LRU is now type generic for its key value.
This was done to profile between using u64, u32, and u16.
What ended up happening was using u32 for both the atlas and the shape cache
yielded a several ms save for processing thousands of draw text calls.
There was an attempt at an optimization pass but the directives done here (other than force_inline)
are marginal changes at best.
TODO(Ed): Odin's map rehashes integer values. Maybe bring in a custom KeyTable?
*/
// 16-bit hashing was attempted, however it seems to get collisions with djb8_hash_16
LRU_Fail_Mask_16 :: 0xFFFF
LRU_Fail_Mask_32 :: 0xFFFFFFFF
LRU_Fail_Mask_64 :: 0xFFFFFFFFFFFFFFFF
Pool_ListIter :: i32
Pool_List_Item :: struct( $V_Type : typeid ) #packed {
// Pool_List_Item :: struct( $V_Type : typeid ) {
prev : Pool_ListIter,
next : Pool_ListIter,
value : V_Type,
}
Pool_List :: struct( $V_Type : typeid) {
items : [dynamic]Pool_List_Item(V_Type),
free_list : [dynamic]Pool_ListIter,
front : Pool_ListIter,
back : Pool_ListIter,
size : i32,
capacity : i32,
dbg_name : string,
}
pool_list_init :: proc( pool : ^Pool_List($V_Type), capacity : i32, dbg_name : string = "" )
{
error : Allocator_Error
pool.items, error = make( [dynamic]Pool_List_Item(V_Type), int(capacity) )
assert( error == .None, "VEFontCache.pool_list_inits: Failed to allocate items array")
resize( & pool.items, capacity )
pool.free_list, error = make( [dynamic]Pool_ListIter, len = 0, cap = int(capacity) )
assert( error == .None, "VEFontCache.pool_list_init: Failed to allocate free_list array")
resize( & pool.free_list, capacity )
pool.capacity = capacity
pool.dbg_name = dbg_name
for id in 0 ..< pool.capacity {
pool.free_list[id] = Pool_ListIter(id)
pool.items[id] = {
prev = -1,
next = -1,
}
}
pool.front = -1
pool.back = -1
}
pool_list_free :: proc( pool : ^Pool_List($V_Type) ) {
delete( pool.items)
delete( pool.free_list)
}
pool_list_reload :: proc( pool : ^Pool_List($V_Type), allocator : Allocator ) {
reload_array( & pool.items, allocator )
reload_array( & pool.free_list, allocator )
}
pool_list_clear :: proc( pool: ^Pool_List($V_Type) )
{
clear(& pool.items)
clear(& pool.free_list)
resize( & pool.items, cap(pool.items) )
resize( & pool.free_list, cap(pool.free_list) )
for id in 0 ..< pool.capacity {
pool.free_list[id] = Pool_ListIter(id)
pool.items[id] = {
prev = -1,
next = -1,
}
}
pool.front = -1
pool.back = -1
pool.size = 0
}
@(optimization_mode="favor_size")
pool_list_push_front :: proc( pool : ^Pool_List($V_Type), value : V_Type ) #no_bounds_check
{
if pool.size >= pool.capacity do return
length := len(pool.free_list)
assert( length > 0 )
assert( length == int(pool.capacity - pool.size) )
id := pool.free_list[ len(pool.free_list) - 1 ]
// if pool.dbg_name != "" {
// logf("pool_list: back %v", id)
// }
pop( & pool.free_list )
pool.items[ id ].prev = -1
pool.items[ id ].next = pool.front
pool.items[ id ].value = value
// if pool.dbg_name != "" {
// logf("pool_list: pushed %v into id %v", value, id)
// }
if pool.front != -1 do pool.items[ pool.front ].prev = id
if pool.back == -1 do pool.back = id
pool.front = id
pool.size += 1
}
@(optimization_mode="favor_size")
pool_list_erase :: proc( pool : ^Pool_List($V_Type), iter : Pool_ListIter ) #no_bounds_check
{
if pool.size <= 0 do return
assert( iter >= 0 && iter < Pool_ListIter(pool.capacity) )
assert( len(pool.free_list) == int(pool.capacity - pool.size) )
iter_node := & pool.items[ iter ]
prev := iter_node.prev
next := iter_node.next
if iter_node.prev != -1 do pool.items[ prev ].next = iter_node.next
if iter_node.next != -1 do pool.items[ next ].prev = iter_node.prev
if pool.front == iter do pool.front = iter_node.next
if pool.back == iter do pool.back = iter_node.prev
iter_node.prev = -1
iter_node.next = -1
iter_node.value = 0
append( & pool.free_list, iter )
pool.size -= 1
if pool.size == 0 {
pool.back = -1
pool.front = -1
}
}
@(optimization_mode="favor_size")
pool_list_move_to_front :: proc "contextless" ( pool : ^Pool_List($V_Type), iter : Pool_ListIter ) #no_bounds_check
{
if pool.front == iter do return
item := & pool.items[iter]
if item.prev != -1 do pool.items[ item.prev ].next = item.next
if item.next != -1 do pool.items[ item.next ].prev = item.prev
if pool.back == iter do pool.back = item.prev
item.prev = -1
item.next = pool.front
pool.items[ pool.front ].prev = iter
pool.front = iter
}
@(optimization_mode="favor_size")
pool_list_peek_back :: #force_inline proc ( pool : Pool_List($V_Type) ) -> V_Type #no_bounds_check {
assert( pool.back != - 1 )
value := pool.items[ pool.back ].value
return value
}
@(optimization_mode="favor_size")
pool_list_pop_back :: #force_inline proc( pool : ^Pool_List($V_Type) ) -> V_Type #no_bounds_check {
if pool.size <= 0 do return 0
assert( pool.back != -1 )
value := pool.items[ pool.back ].value
pool_list_erase( pool, pool.back )
return value
}
LRU_Link :: struct #packed {
value : i32,
ptr : Pool_ListIter,
}
LRU_Cache :: struct( $Key_Type : typeid ) {
capacity : i32,
num : i32,
table : map[Key_Type]LRU_Link,
key_queue : Pool_List(Key_Type),
}
lru_init :: proc( cache : ^LRU_Cache($Key_Type), capacity : i32, dbg_name : string = "" ) {
error : Allocator_Error
cache.capacity = capacity
cache.table, error = make( map[Key_Type]LRU_Link, uint(capacity) )
assert( error == .None, "VEFontCache.lru_init : Failed to allocate cache's table")
pool_list_init( & cache.key_queue, capacity, dbg_name = dbg_name )
}
lru_free :: proc( cache : ^LRU_Cache($Key_Type) ) {
pool_list_free( & cache.key_queue )
delete( cache.table )
}
lru_reload :: #force_inline proc( cache : ^LRU_Cache($Key_Type), allocator : Allocator ) {
reload_map( & cache.table, allocator )
pool_list_reload( & cache.key_queue, allocator )
}
lru_clear :: proc ( cache : ^LRU_Cache($Key_Type) ) {
pool_list_clear( & cache.key_queue )
clear(& cache.table)
cache.num = 0
}
@(optimization_mode="favor_size")
lru_find :: #force_inline proc "contextless" ( cache : LRU_Cache($Key_Type), key : Key_Type, must_find := false ) -> (LRU_Link, bool) #no_bounds_check {
link, success := cache.table[key]
return link, success
}
@(optimization_mode="favor_size")
lru_get :: #force_inline proc ( cache: ^LRU_Cache($Key_Type), key : Key_Type ) -> i32 #no_bounds_check {
if link, ok := &cache.table[ key ]; ok {
pool_list_move_to_front(&cache.key_queue, link.ptr)
return link.value
}
return -1
}
@(optimization_mode="favor_size")
lru_get_next_evicted :: #force_inline proc ( cache : LRU_Cache($Key_Type) ) -> Key_Type #no_bounds_check {
if cache.key_queue.size >= cache.capacity {
evict := pool_list_peek_back( cache.key_queue )
return evict
}
return ~Key_Type(0)
}
@(optimization_mode="favor_size")
lru_peek :: #force_inline proc "contextless" ( cache : LRU_Cache($Key_Type), key : Key_Type, must_find := false ) -> i32 #no_bounds_check {
iter, success := lru_find( cache, key, must_find )
if success == false {
return -1
}
return iter.value
}
@(optimization_mode="favor_size")
lru_put :: proc( cache : ^LRU_Cache($Key_Type), key : Key_Type, value : i32 ) -> Key_Type #no_bounds_check
{
// profile(#procedure)
if link, ok := & cache.table[ key ]; ok {
pool_list_move_to_front( & cache.key_queue, link.ptr )
link.value = value
return key
}
evict := key
if cache.key_queue.size >= cache.capacity {
evict = pool_list_pop_back(&cache.key_queue)
delete_key(&cache.table, evict)
cache.num -= 1
}
pool_list_push_front(&cache.key_queue, key)
cache.table[key] = LRU_Link{
value = value,
ptr = cache.key_queue.front,
}
cache.num += 1
return evict
}
lru_refresh :: proc( cache : ^LRU_Cache($Key_Type), key : Key_Type ) {
link, success := lru_find( cache ^, key )
pool_list_erase( & cache.key_queue, link.ptr )
pool_list_push_front( & cache.key_queue, key )
link.ptr = cache.key_queue.front
}

View File

@@ -0,0 +1,126 @@
package vefontcache
// There are only 4 actual regions of the atlas. E represents the atlas_decide_region detecting an oversized glyph.
// Note(Ed): None should never really occur anymore. So its safe to most likely add an assert when its detected.
Atlas_Region_Kind :: enum u8 {
None = 0x00,
A = 0x01,
B = 0x02,
C = 0x03,
D = 0x04,
E = 0x05,
Ignore = 0xFF, // ve_fontcache_cache_glyph_to_atlas uses a -1 value in clear draw call
}
Atlas_Key :: u32
// TODO(Ed) It might perform better with a tailored made hashtable implementation for the LRU_Cache or dedicated array struct/procs for the Atlas.
/* Essentially a sub-atlas of the atlas. There is a state cache per region that tracks the glyph inventory (what slot they occupy).
Unlike the shape cache this one's fixed capacity (natrually) and the next avail slot is tracked.
*/
Atlas_Region :: struct {
state : LRU_Cache(Atlas_Key),
size : Vec2i,
capacity : Vec2i,
offset : Vec2i,
slot_size : Vec2i,
next_idx : i32,
}
/* There are four regions each succeeding region holds larger sized slots.
The generator pipeline for draw lists utilizes the regions array for info lookup.
Note(Ed):
Padding can techncially be larger than 1, however recently I haven't had any artififact issues...
size_multiplier usage isn't fully resolved. Intent was to further setup over_sampling or just having
a more massive cache for content that used more than the usual common glyphs.
*/
Atlas :: struct {
region_a : Atlas_Region,
region_b : Atlas_Region,
region_c : Atlas_Region,
region_d : Atlas_Region,
regions : [5] ^Atlas_Region,
glyph_padding : f32, // Padding to add to bounds_<width/height>_scaled for choosing which atlas region.
size_multiplier : f32, // Grows all text by this multiple.
size : Vec2i,
}
// Hahser for the atlas.
@(optimization_mode="favor_size")
atlas_glyph_lru_code :: #force_inline proc "contextless" ( font : Font_ID, px_size : f32, glyph_index : Glyph ) -> (lru_code : Atlas_Key) {
// lru_code = u32(glyph_index) + ( ( 0x10000 * u32(font) ) & 0xFFFF0000 )
font := font
glyph_index := glyph_index
px_size := px_size
djb8_hash( & lru_code, to_bytes( & font) )
djb8_hash( & lru_code, to_bytes( & glyph_index ) )
djb8_hash( & lru_code, to_bytes( & px_size ) )
return
}
@(optimization_mode="favor_size")
atlas_region_bbox :: #force_inline proc( region : Atlas_Region, local_idx : i32 ) -> (position, size: Vec2)
{
size = vec2(region.slot_size)
position.x = cast(f32) (( local_idx % region.capacity.x ) * region.slot_size.x)
position.y = cast(f32) (( local_idx / region.capacity.x ) * region.slot_size.y)
position.x += f32(region.offset.x)
position.y += f32(region.offset.y)
return
}
@(optimization_mode="favor_size")
atlas_decide_region :: #force_inline proc "contextless" (atlas : Atlas, glyph_buffer_size : Vec2, bounds_size_scaled : Vec2 ) -> (region_kind : Atlas_Region_Kind)
{
// profile(#procedure)
glyph_padding_dbl := atlas.glyph_padding * 2
padded_bounds := bounds_size_scaled + glyph_padding_dbl
for kind in 1 ..= 4 do if
padded_bounds.x <= f32(atlas.regions[kind].slot_size.x) &&
padded_bounds.y <= f32(atlas.regions[kind].slot_size.y)
{
return cast(Atlas_Region_Kind) kind
}
if padded_bounds.x <= glyph_buffer_size.x && padded_bounds.y <= glyph_buffer_size.y{
return .E
}
return .None
}
// Grab an atlas LRU cache slot.
@(optimization_mode="favor_size")
atlas_reserve_slot :: #force_inline proc ( region : ^Atlas_Region, lru_code : Atlas_Key ) -> (atlas_index : i32)
{
if region.next_idx < region.state.capacity
{
evicted := lru_put( & region.state, lru_code, region.next_idx )
atlas_index = region.next_idx
region.next_idx += 1
assert( evicted == lru_code )
}
else
{
next_evict_codepoint := lru_get_next_evicted( region.state )
assert( next_evict_codepoint != LRU_Fail_Mask_16)
atlas_index = lru_peek( region.state, next_evict_codepoint, must_find = true )
assert( atlas_index != -1 )
evicted := lru_put( & region.state, lru_code, atlas_index )
assert( evicted == next_evict_codepoint )
}
assert( lru_get( & region.state, lru_code ) != - 1 )
return
}

View File

@@ -0,0 +1,188 @@
# Interface
## Lifetime
### startup
Initializes a provided context.
There are a large amount of parameters to tune the library instance to the user's preference. By default, keep in mind the library defaults to utilize stb_truetype as the font parser and harfbuzz for the shaper.
Much of the data structures within the context struct are not fixed-capacity allocations so make sure that the backing allocator can handle it.
### hot_reload
The library supports being used in a dynamically loaded module. If its hot-reloaded simply make sure to call this procedure with a reference to the backing allocator provided during startup as all dynamic containers tend to lose a proper reference to the allocator's procedure.
Call `clear_atlas_region_caches` & `clear_shape_cache` to reset the library's shape and glyph cache state to force a re-render.
### shutdown
Release resources from the context.
### clear_atlas_region_caches
Clears the LRU caches of regions A-D of the Atlas & sets their next_idx to 0. Effectively will force a re-cache of all previously rendered glyphs. Shape configuration for the glyph will remain unchanged unless clear_shape_cache is also called.
### clear_shape_cache
Clears the LRU cache of the shaper along with clearing all existing storage entries. Effectively will force a re-cache of previously cached text shapes (Does not recache their rendered glyphs).
### load_font
Will load an instance of a font. The user needs to load the file's bytes themselves, the font entry (Entry :: struct) will by tracked by the library. The user will be given a font_id which is a direct index for the entry in the tracked array.
### unload_font
Will free an entry, (parser and shaper resources also freed)
## Shaping
Ideally the user should track the shapes themselves in a time-scale beyond the per-frame draw call. This avoids having to do caching/lookups of the shope.
### shape_text
Will shape the text using the `shaper_proc` arugment (user overloadable). Shape will be cached by the library.
### shape_text_uncached
Will shape the text using the `shaper_proc` arugment (user overloadable).
Shape will NOT be cached by the library. Use this if you want to roll your own solution for tracking shapes.
## Draw list generation
### draw_text procedures
There a total of six procedures, 3 for shapes, 3 for text:
* `draw_shape_normalized_space`
* `draw_shape_view_space`
* `draw_shape`
* `draw_text_normalized_space`
* `draw_text_view_space`
* `draw_text`
The normalized space procedures are the `baseline` interface draw procedures. They expec the position, and scale provided to operate with an unsigned normalized space where the bottom left is 0.0, 0.0 and the top right is 1.0, 1.0.
The view space will normalize the position and scale for the user based on the provided view and zoom. The coordinate system is still unsigned just scaled to the view's size.
The non-suffix named procedures use the scope stack to derive the position and scale the user provides a relative position and scale for the text that will be adjusted to the scope's view, position, scale, & zoom.
See the comment above each of the procedures for diagrams.
### get_draw_list
Get the enqueded draw_list (vertices, indices, and draw call arrays) in its entirety.
By default, if get_draw_list is called, it will first call `optimize_draw_list` to optimize the draw list's calls for the user. If this is undesired, make sure to pass `optimize_before_returning = false` in the arguments.
### get_draw_list_layer
Get the enqueued draw_list for the current "layer".
A layer is considered the slice of the `Draw_List`'s content from the last call to `flush_draw_list_layer` onward.
By default, if `get_draw_list_layer` is called, it will first call `optimize_draw_list` for the user to optimize the slice (exlusively) of the draw list's draw calls. If this is undesired, make sure to pass `optimize_before_returning = false` in the arguments.
The draw layer offsets are cleared with `flush_draw_list`
### flush_draw_list
Will clear the draw list and draw layer offsets.
### flush_draw_list_layer
Will update the draw list layer with the latest offset based on the current lenght of the draw list vertices, indices, and calls arrays.
## Metrics
### measure_shape_size
This provide's the shape size scaled down by the ctx.px_scale to get intended usage size. Size is equivalent to `measure_text_size`.
### measure_text_size
Provides a Vec2 the width and height occupied by the provided text string. The y is measured to be the the largest glyph box bounds height of the text. The width is derived from the `end_cursor_pos` field from a `Shaped_Text` entry.
### get_font_vertical_metrics
A wrapper for `parser_get_font_vertical_metrics`. Will provide the ascent, descent, and line_gap for a font entry.
## Miscellaneous
Stuff used by the draw list generation interface or just getters and setters.
### get_cursor_pos
Will provide the current cursor_pos for the resulting text drawn.
### get_normalized_position_scale
Will normalize the value of the position and scale based on the provided view.
Position will also be snapped to the nearest pixel via ceil.
Does nothing if view is 1 or 0
This is used by draw via view relative space procedures to normalize it to the intended space for the render pass.
### resolve_draw_px_size
Used to constrain the px_size used in `resolve_zoom_size_scale`.
The view relative space and scoping stack-based procedures support zoom. When utilizing zoom their is a nasty jitter that will occur if the user smoothly goes across different font sizes because the spacing can drastically change between even and odd font-sizes. This is applied to enforce the font sticks to a specific interval.
The library uses the context's zoom_px_interval as the reference interval in the draw procedures. It can be set with `set_zoom_px_interval` and the default value is 2.
### resolve_zoom_size_scale
Provides a way to get a "zoom" on the font size and scale, similar conceptually to a canvas UX zoom
Does nothing when zoom is 1.0
Uses `resolve_draw_px_size` to constrain which font size is used for the zoom.
### set_alpha_scalar
This is an artifact feature of the current shader, it *may* be removed in the future... Increasing the alpha of the colour draw with above 1.0 increases the edge contrast of the glyph shape.
For the value to be added to the colour, the alph of the text must already be at 1.0 or greater.
### set_px_scalar
This another "super-scalar" applied to rendering glyphs. In each draw procedure the following is computed before passing the values to the shaper and draw list generation passes:
```go
target_px_size := px_size * ctx.px_scalar
target_scale := scale * (1 / ctx.px_scalar)
target_font_scale := parser_scale( entry.parser_info, target_px_size )
```
Essentially, `ctx.px_scalar` is used to upscale the px_size by its value and then downscale the render target scale back the indended size. Doing so provides better shape positioning and futher improves text hinting. The downside is that small text tends to become more jagged (as its really hitting the limits of of how well the shader can blend those edges at that resolution).
This will most likely be preserved with future shader upgrades, however it will most likely not be as necessary as it is right now to achieve crisp text.
### set_zoom_px_interval
Used with by draw procedures with `resolve_draw_px_size` & `resolve_zoom_size_scale`. Provides the interval to use when constraining the px_size to a specific set of values when using zoom scaling.
### set_snap_glyph_shape_position
During the shaping pass, the position of each glyph can be rounded up to the integer to (ussually) allow better hinting.
### set_snap_glyph_render_height
During the draw list generation pass, the position of each glyph when blitting to atlas can have teh quad size rounded up to the integer.
Can yield better hinting but may significantly stretch the glyphs at small scales.
## Scope Stack
These are a set of push & pop pairs of functions that operator ont he context's stack containers. They are used with the draw_shape and draw_text procedures. This mainly for quick scratch usage where the user wants to directly compose a large amount of text without having a UI framework directly handle the text backend.
* font
* font_size
* colour: Linear colour.
* view: Width and height of the 2D area the text will be drawn within.
* position: Uses relative positioning will offset the incoming position by the given amount.
* scale: Uses relative scaling, will scale the procedures incoming scale by the given amount.
* zoom: Affects scaling, will scale the procedure's incoming font size & scale based on an *UX canvas camera's* notion of it.
Procedure types:
* `scope_<stack_option>`: push with a defer pop
* `push_<stack_option>`
* `pop_<stack_option>`

View File

@@ -0,0 +1,234 @@
# Guide: Architecture
Overview of the package design and code-path layout.
---
The purpose of this library is to alleviate four key challenges with one encapsulating package:
* Font parsing
* Text codepoint shaping
* Glyph shape triangulation
* Glyph draw-list generation
Shaping text, getting metrics for glyphs, triangulating glyphs, and anti-aliasing their render are expensive operations to perform per frame. Therefore, any compute operations that can be cached, will be.
There are two cache types used:
* Shape cache (`Shaped_Text_Cache.state`)
* Atlas region cache (`Atlas_Region.state`)
The shape cache stores all data for a piece of text that will be utilized in a draw call that is not dependent on a specific position & scale (and is faster to lookup vs compute per draw call).
The atlas region cache tracks what slots have glyphs rendered to the texture atlas. This essentially caches triangulation and super-sampling computations.
All caching uses [LRU.odin](../vefontcache/LRU.odin)
## Code Paths
### Lifetime
The library lifetime is straightforward: you have a startup procedure that should be called during your usual app initialization. From there you may either choose to manually shut it down or let the OS clean it up.
If hot-reload is desired, you just need to call hot_reload with the context's backing allocator to refresh the procedure references. After the DLL has been reloaded, these should be the only aspects that have been scrambled.
Usually when hot-reloading the library for tuning or major changes, you'd also want to clear the caches. Simply call `clear_atlas_region_caches` & `clear_shape_cache` right after.
Ideally, there should be zero dynamic allocation on a per-frame basis as long as the reserves for the dynamic containers are never exceeded. It's acceptable if they do exceed as their memory locality is so large their distance in the pages to load into CPU cache won't matter - it just needs to be a low incidence.
### Shaping Pass
If using the library's cache, `shaper_shape_text_cached` handles the hashing and lookup. As long as a shape is found, it will not enter the uncached code path. By default, this library uses `shaper_shape_harfbuzz` as the `shape_text_uncached` procedure.
Shapes are cached using the following parameters to hash a key:
* font: Font_ID
* font_size: f32
* the text itself: string
All shapers fulfill the following interface:
```odin
Shaper_Shape_Text_Uncached_Proc :: #type proc( ctx : ^Shaper_Context,
atlas : Atlas,
glyph_buffer_size : Vec2,
font : Font_ID,
entry : Entry,
font_px_Size : f32,
font_scale : f32,
text_utf8 : string,
output : ^Shaped_Text
)
```
Which will resolve the output `Shaped_Text`. It has the following definition:
```odin
Shaped_Text :: struct #packed {
glyph : [dynamic]Glyph,
position : [dynamic]Vec2,
visible : [dynamic]i16,
atlas_lru_code : [dynamic]Atlas_Key,
region_kind : [dynamic]Atlas_Region_Kind,
bounds : [dynamic]Range2,
end_cursor_pos : Vec2,
size : Vec2,
font : Font_ID,
px_size : f32,
}
```
The result of the shaping process is the glyphs and their positions for the the shape; historically resembling whats known as a *Slug* of prepared text for printing. The end position of where the user's "cursor" would be is also recorded which provided the end position of the shape. The size of the shape is also resolved here, which if using px_scalar must be downscaled. `measure_shape_size` does the downscaling for the user.
`visible` tracks which of the glyphs will actually be relevant for the draw_list pass. This is to avoid a conditional jump during the draw list gen pass. When accessing glyph or position during the draw_list gen, they will use visible's relative index.
The font and px_size is tracked here as well so they user does not need to provide it to the library's interface and related.
As stated under the main heading of this guide, the the following are within shaped text so that they may be resolved outside of the draw list generation (see: `generate_shape_draw_list`):
* atlas_lru_code
* region_kind
* bounds
These are the same length as the `visible` array, so indexing those will not need to use visibile's relative index.
`shaper_shape_text_latin` does naive shaping by utilizing the codepoint's kern_advance and detecting newlines.
`shaper_shape_harfbuzz` is an actual shaping *engine*. Here is the general idea of how the library utilizes it for shaping:
1. Reset the state of the hb_buffer
2. Determine the line height
3. Go through the codepoints: (for each)
1. Determine the codepoint's script
2. If the script is netural (Uknown, Inherited, or of Common type), the script has not changed, or this is the first codepoint of the shape we can add the codepoint to the buffer.
3. Otherwise we will have to start a shaping run if we do encounter a significant script change. After, we can add the codepoint to the post-run-cleared hb_buffer.
4. This continues until all codepoints have been processed.
4. We do a final shape run after iterating to make sure all codepoints have been processed.
5. Set the size of the shape: X is max line width, Y is line height multiplied by the line count.
6. Resolve the atlas_lru_code, region_kind, and bounds for all visible glyphs
7. Store the font and px_size information.
The `shape_run` procedure within does the following:
1. Setup the buffer for the batch
2. Have harfbuzz shape the buffer
3. Extract glyph infos and positions from the buffer.
4. Iterate through all glyphs
1. If the hb_glyph cluster is > 0, we need to treat it as the indication of a newline glyph. ***(We update position and skip)***
2. Update positioning and other metrics and append output shape's glyph and position.
3. If the glyph is visible we append it to shape's visible (harfbuzz must specify it as not .nodef, and parser must identify it as non-empty)
5. We update the output.end_cursor_pos with the last position processed by the iteration
6. Clear the hb_buffer's contents to prepare for a possible upcoming shape run.
**Note on shape_run.4: The iteration doesn't preserve tracking the clusters, so that information is lost.**
*In the future cluster tracking may be added if its found to be important for high level text features beyond rendering.*
**Note on shape_run.4.1: Don't know if the glyph signifiying newline should be preserved**
See [Harfbuzz documentation](https://harfbuzz.github.io) for additional information.
There are other shapers out there:
* [hamza](https://github.com/saidwho12/hamza): A notable C library that could be setup with bindings.
***Note: Monospace fonts may have a much more trivial shaper (however for fonts with ligatures this may not be the case)***
***They should only need the kern advance of a single glyph as they're all the same. ligatures (I believe) should preserve this kern advance.***
### Draw List Generation
All interface draw text procedures will ultimately call `generate_shape_draw_list`. If the draw procedure is given text, it will call `shaper_shape_text_cached` the text immediately before calling it.
Its implementation uses a batched-pipeline approach where its goal is to populate three arrays behavings as queues:
* oversized: For drawing oversized glyphs
* to_cache: For glyphs that need triangulation & rendering to glyph buffer then blitting to atlas.
* cache: For glyphs that are already cached in the atlas and just need to be blit to the render target.
And then sent those off to `batch_generate_glyphs_draw_list` for further actual generation to be done. The size of a batch is determined by the capacity of the glyph_buffer's `batch_cache`. This can be set in `glyph_draw_params` for startup.
`glyph_buffer.glyph_pack` is utilized by both `generate_shape_draw_list` and `batch_generate_glyphs_draw_list` to various computed data in an SOA data structure for the glyphs.
generate_shape_draw_list outline:
1. Prepare glyph_pack, oversized, to_cache, cached, and reset the batch cache
* `glyph_pack` is resized to to the length of `shape.visible`
* The other arrays populated have their reserved set to that length as well (they will not bounds check capacity on append)
2. Iterate through the shape.visible and resolve glyph_pack's positions.
3. Iterate through shape.visible this time for final region resolution and segregation of glyphs to their appropriate queue.
1. If the glyphs assigned region is `.E` its oversized. The `oversample` used for rendering to render target will either be 2x or 1x depending on how huge it is.
2. The following glyphs are checked to see if their assigned region has the glyph `cached`.
1. If it does, its just appended to cached and marked as seen in the `batch_cache`.
2. If its doesn't then a slot is reserved for within the atlas's region and the glyph is appended to `to_cache`.
3. For either case the atlas_region_bbox is computed.
3. After a batch has been resolved, `batch_generate_glyphs_draw_list` is called.
4. If there is an partially filled batch (the usual case), batch_generate_glyphs_draw_list will be called for it.
5. The cursor_pos is updated with the shape's end cursor position adjusted for the target space.
batch_generate_glyphs_draw_list outline:
The batch is organized into three major stages:
1. glyph transform & draw quads compute
2. glyph_buffer draw list generation (`oversized` & `to_cache`)
3. blit-from-atlas to render target draw list generation (`to_cache` & `cached`)
Glyph transform & draw quads compute does an iteration for each of the 3 arrays.
Nearly all the math for all three is done there *except* for `to_cache`, which does its blitting compute in its glyph_buffer draw-list gen pass.
glyph_buffer draw list generation paths for `oversized` and `to_cache` are unique to each.
For `oversized`:
1. Allocate glyph shapes
2. Iterate oversized:
1. Flush the glyph buffer if flagged todo so (reached glyph allocation limit)
2. Call `generate_glyph_pass_draw_list` for trianglation and rendering to buffer.
3. blit quad.
3. flush the glyph buffer's draw list.
4. free glyph shapes
For `to_cached`:
1. Allocate glyph shapes
2. Iterate to_cache:
1. Flush the glyph buffer if flagged todo so (reached glyph allocation limit)
2. Compute & blit quads for clearing the atlas region and blitting from the buffer to the atlas.
3. Call `generate_glyph_pass_draw_list` for trianglation and rendering to buffer.
3. flush the glyph buffer's draw list.
4. free glyph shapes
5. Do blits from atlas to draw list.
`cached` only needs to blit from the atlas to the render target.
`generate_glyph_pass_draw_list`: sets up the draw call for glyph to the glyph buffer. Currently it also handles triangulation as well. For now the shape triangulation is rudimentary and uses triangle fanning. Eventually it would be nice to offer alternative modes that can be specified on a per-font basis.
`flush_glyph_buffer_draw_list`: Will merge the draw_lists contents of the glyph buffer over to the library's general draw_list, the clear the buffer's draw lists.
### On Layering
The base draw list generation pippline provided by the library allows the user to batch whatever they want into a single "layer".
However, the user most likely would want take into consideration: font instances, font size, colors; these are things that may benefit from having shared locality during a layer batch. Overlaping text benefits from the user to handle the ordering via layers.
Layers (so far) are just a set of offssets tracked by the library's `Context.draw_layer` struct. When `flush_draw_list_layer` is called, the offsets are set to the current length of the draw list. This allows the rendering backend to retrieve the latest set of vertices, indices, and calls to render on a per-layer basis with: `get_draw_list_layer`.
Importantly, this leads to the following pattern when enuquing a layer to render:
1. Begin render pass
2. For codepath that will deal with text layers
1. Process user-level code-path that calls the draw text interface, populating the draw list layer (usually a for loop)
2. After iteration on the layer is complete, render the text layer
1. grab the draw list layer
2. flush the layer so the draw list offsets are reset
3. Repeat until all layers for the codepath are exhausted.
There is consideration to instead explicitly have a draw list with more contextual information of the start and end of each layer. So that batching can be orchestrated in an isolated section of their pipeline.
This would involve just tracking *slices* of thier draw-list that represents layers:
```odin
Draw_List_Layer :: struct {
vertices : []Vertex,
indices : []u32,
calls : []Draw_Call,
}
```
Eventually the library may provide this since adding that feature is relatively cheap and and a low line-count addition to the interface.
There should be little to no perfomrance loss from doing so as the iteration size is two large of a surface area to matter (so its just pipeline ergonomics)

View File

@@ -0,0 +1,68 @@
# Guide: Backend
The end-user needs to adapt this library to hook into their own codebase. For reference, they can check the [examples](../examples/) and [backend](../backend/) directories for working code that demonstrates what this guide covers.
When rendering text, users need to handle two main aspects: the text to draw and its "layering". Similar to UIs, text should be drawn in layer batches, where each layer can represent a pass with arbitrary distinctions from other layers.
The following components are required:
* Vertex and Index Buffers for glyph meshes
* Glyph shader for rendering glyphs to the glyph buffer
* Atlas shader for blitting upscaled glyph quads from the glyph buffer to an atlas region slot (downsampled)
* "Screen or Target" shader for blitting glyph quads from the atlas to a render target or swapchain
* The glyph, atlas, and target image buffers
Currently, the library doesn't support sub-pixel AA, so we're only rendering to R8 images.
## Rendering Passes
There are four passes that need to be handled when rendering a draw list:
* Glyph: Rendering a glyph mesh to the glyph buffer
* Atlas: Blitting a glyph quad from the glyph buffer to an atlas slot
* Target: Blitting from the atlas image to the target image
* Target_Uncached: Blitting from the glyph buffer image to the target image
The Target & Target_Uncached passes can technically be handled in the same case. The user just needs to swap between using the atlas image and the glyph buffer image. This is how the backend_soko.odin's `render_text_layer` has these passes set up.
## Vertex Buffer Layout
The vertex buffer has the following layout for all passes:
* `[2]f32` for positions
* `[2]f32` for texture coords (Offset is naturally `[2]f32`)
* Total stride: `[4]f32`
---
The index buffer is a simple u32 stream.
For quad mesh layout details, see `blit_quad` in [draw.odin](../vefontcache/draw.odin).
For glyph shape triangulation meshes, the library currently only uses a triangle fanning technique, implemented in `fill_path_via_fan_triangulation` within [draw.odin](../vefontcache/draw.odin). Eventually, the library will support other modes on a per-font basis.
## UV Coordinate Conventions (GLSL vs HLSL)
DirectX, Metal, and Vulkan consider the top-left corner as (0, 0), where the Y axis increases downward (traditional screenspace). This library follows OpenGL's convention, where (0, 0) is at the bottom-left (Y goes up).
Adjust the UV coordinates in your shader accordingly:
```c
#if !OpenGL
uv = vec2(v_texture.x, 1.0 - v_texture.y);
#else
uv = vec2(v_texture.x, v_texture.y);
#endif
```
Eventually, the library will support both conventions as a comp-time conditional.
## Retrieving & Processing the layer
`get_draw_list_layer` will provide the layer's vertex, index, and draw call slices. Unless the default is overwritten, it will call `optimize_draw_list` before returning the slices (profile to see whats better for your use case).
Once those are retrived, call `flush_draw_list_layer` to update the layer offsets tracked by the library's `Context`.
The vertex and index slices just needed to be appended to your backend's vertex and index buffers.
The draw calls need to be iterated with a switch statement for the aforementioned pass types. Within the case you can construct the enqueue the passes.
---

View File

@@ -0,0 +1,17 @@
Vertex Engine GPU Font Cache
Copyright 2020 Xi Chen
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

855
code2/vefontcache/draw.odin Normal file
View File

@@ -0,0 +1,855 @@
package vefontcache
/*
Note(Ed): This may be seperated in the future into another file dedending on how much is involved with supportin ear-clipping triangulation.
*/
// import "thirdparty:freetype"
Glyph_Trianglation_Method :: enum(i32) {
Ear_Clipping,
Triangle_Fanning,
}
Vertex :: struct {
pos : Vec2,
u, v : f32,
}
Glyph_Bounds_Mat :: matrix[2, 2] f32
Glyph_Draw_Quad :: struct {
dst_pos : Vec2,
dst_scale : Vec2,
src_pos : Vec2,
src_scale : Vec2,
}
// This is used by generate_shape_draw_list & batch_generate_glyphs_draw_list
// to track relevant glyph data in soa format for pipelined processing
Glyph_Pack_Entry :: struct #packed {
position : Vec2,
atlas_index : i32,
region_pos : Vec2,
region_size : Vec2,
over_sample : Vec2, // Only used for oversized glyphs
shape : Parser_Glyph_Shape,
draw_transform : Transform,
draw_quad : Glyph_Draw_Quad,
buffer_x : f32,
flush_glyph_buffer : b8,
}
Draw_Call :: struct {
pass : Frame_Buffer_Pass,
start_index : u32,
end_index : u32,
clear_before_draw : b32,
region : Atlas_Region_Kind,
colour : RGBAN,
}
Draw_Call_Default :: Draw_Call {
pass = .None,
start_index = 0,
end_index = 0,
clear_before_draw = false,
region = .A,
colour = { 1.0, 1.0, 1.0, 1.0 }
}
Draw_List :: struct {
vertices : [dynamic]Vertex,
indices : [dynamic]u32,
calls : [dynamic]Draw_Call,
}
Frame_Buffer_Pass :: enum u32 {
None = 0,
Glyph = 1, // Operations on glyph buffer render target
Atlas = 2, // Operations on atlas render target
Target = 3, // Operations on user's end-destination render target using atlas
Target_Uncached = 4, // Operations on user's end-destination render target using glyph buffer
}
Glyph_Batch_Cache :: struct {
table : map[Atlas_Key]b8,
num : i32,
cap : i32,
}
// The general tracker for a generator pipeline
Glyph_Draw_Buffer :: struct{
over_sample : Vec2,
size : Vec2i,
draw_padding : f32,
snap_glyph_height : f32,
snap_glyph_width : f32,
allocated_x : i32, // Space used (horizontally) within the glyph buffer
clear_draw_list : Draw_List,
draw_list : Draw_List,
batch_cache : Glyph_Batch_Cache,
shape_gen_scratch : [dynamic]Vertex, // Used during triangulating a glyph into a mesh.
glyph_pack : #soa[dynamic]Glyph_Pack_Entry,
oversized : [dynamic]i32,
to_cache : [dynamic]i32,
cached : [dynamic]i32,
}
// Contructs a quad mesh for bliting a texture from source render target (src uv0 & 1) to the destination render target (p0, p1)
@(optimization_mode="favor_size")
blit_quad :: #force_inline proc ( draw_list : ^Draw_List,
p0 : Vec2 = {0, 0},
p1 : Vec2 = {1, 1},
uv0 : Vec2 = {0, 0},
uv1 : Vec2 = {1, 1}
)
{
// profile(#procedure)
v_offset := cast(u32) len(draw_list.vertices)
quadv : [4]Vertex = {
{
{p0.x, p0.y},
uv0.x, uv0.y
},
{
{p0.x, p1.y},
uv0.x, uv1.y
},
{
{p1.x, p0.y},
uv1.x, uv0.y
},
{
{p1.x, p1.y},
uv1.x, uv1.y
}
}
append( & draw_list.vertices, ..quadv[:] )
quad_indices : []u32 = {
0 + v_offset, 1 + v_offset, 2 + v_offset,
2 + v_offset, 1 + v_offset, 3 + v_offset
}
append( & draw_list.indices, ..quad_indices[:] )
return
}
// Constructs a triangle fan mesh to fill a shape using the provided path outside_point represents the center point of the fan.
@(optimization_mode="favor_size")
fill_path_via_fan_triangulation :: proc( draw_list : ^Draw_List,
outside_point : Vec2,
path : []Vertex,
scale := Vec2 { 1, 1 },
translate := Vec2 { 0, 0 }
) #no_bounds_check
{
// profile(#procedure)
v_offset := cast(u32) len(draw_list.vertices)
for point in path {
point := point
point.pos = point.pos * scale + translate
append( & draw_list.vertices, point )
}
outside_vertex := cast(u32) len(draw_list.vertices)
{
vertex := Vertex {
pos = outside_point * scale + translate,
u = 0,
v = 0,
}
append( & draw_list.vertices, vertex )
}
for index : u32 = 1; index < cast(u32) len(path); index += 1 {
indices := & draw_list.indices
to_add := [3]u32 {
outside_vertex,
v_offset + index - 1,
v_offset + index
}
append( indices, ..to_add[:] )
}
}
// Glyph triangulation generator
@(optimization_mode="favor_size")
generate_glyph_pass_draw_list :: proc(draw_list : ^Draw_List, path : ^[dynamic]Vertex,
glyph_shape : Parser_Glyph_Shape,
curve_quality : f32,
bounds : Range2,
translate, scale : Vec2
) #no_bounds_check
{
profile(#procedure)
outside := Vec2{bounds.p0.x - 21, bounds.p0.y - 33}
draw := Draw_Call_Default
draw.pass = Frame_Buffer_Pass.Glyph
draw.start_index = u32(len(draw_list.indices))
clear(path)
step := 1.0 / curve_quality
for edge, index in glyph_shape do #partial switch edge.type
{
case .Move:
if len(path) > 0 {
fill_path_via_fan_triangulation( draw_list, outside, path[:], scale, translate)
clear(path)
}
fallthrough
case .Line:
append( path, Vertex { pos = Vec2 { f32(edge.x), f32(edge.y)} } )
case .Curve:
assert(len(path) > 0)
p0 := path[ len(path) - 1].pos
p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
p2 := Vec2{ f32(edge.x), f32(edge.y) }
for index : f32 = 1; index <= curve_quality; index += 1 {
alpha := index * step
append( path, Vertex { pos = eval_point_on_bezier3(p0, p1, p2, alpha) } )
}
case .Cubic:
assert( len(path) > 0)
p0 := path[ len(path) - 1].pos
p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) }
p3 := Vec2{ f32(edge.x), f32(edge.y) }
for index : f32 = 1; index <= curve_quality; index += 1 {
alpha := index * step
append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } )
}
}
if len(path) > 0 {
fill_path_via_fan_triangulation(draw_list, outside, path[:], scale, translate)
}
draw.end_index = u32(len(draw_list.indices))
if draw.end_index > draw.start_index {
append( & draw_list.calls, draw)
}
}
// Just a warpper of generate_shape_draw_list for handling an array of shapes
generate_shapes_draw_list :: #force_inline proc ( ctx : ^Context,
font : Font_ID,
colour : RGBAN,
entry : Entry,
px_size : f32,
font_scale : f32,
position : Vec2,
scale : Vec2,
shapes : []Shaped_Text
)
{
assert(len(shapes) > 0)
for shape in shapes {
ctx.cursor_pos = {}
ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, ctx.px_scalar,
colour,
entry,
px_size,
font_scale,
position,
scale,
)
}
}
/* Generator pipeline for shapes
This procedure has no awareness of layers. That should be handled by a higher-order codepath.
Pipleine order:
* Resolve the glyph's position offset from the target position
* Segregate the glyphs into three slices: oversized, to_cache, cached.
* If oversized is not necessary for your use case and your hitting a bottleneck, omit it with setting ENABLE_OVERSIZED_GLYPHS to false.
* The segregation will not allow slices to exceed the batch_cache capacity of the glyph_buffer (configurable within startup params)
* When The capacity is reached batch_generate_glyphs_draw_list will be called which will do futher compute and then finally draw_list generation.
* This may perform better with smaller shapes vs larger shapes, but having more shapes has a cache lookup penatly (if done per frame) so keep that in mind.
*/
generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,
atlas : ^Atlas,
glyph_buffer : ^Glyph_Draw_Buffer,
px_scalar : f32,
colour : RGBAN,
entry : Entry,
px_size : f32,
font_scale : f32,
target_position : Vec2,
target_scale : Vec2,
) -> (cursor_pos : Vec2) #no_bounds_check
{
profile(#procedure)
mark_glyph_seen :: #force_inline proc "contextless" ( cache : ^Glyph_Batch_Cache, lru_code : Atlas_Key ) {
cache.table[lru_code] = true
cache.num += 1
}
reset_batch :: #force_inline proc( cache : ^Glyph_Batch_Cache ) {
clear_map( & cache.table )
cache.num = 0
}
atlas_glyph_pad := atlas.glyph_padding
atlas_size := vec2(atlas.size)
glyph_buffer_size := vec2(glyph_buffer.size)
// Make sure the packs are large enough for the shape
glyph_pack := & glyph_buffer.glyph_pack
oversized := & glyph_buffer.oversized
to_cache := & glyph_buffer.to_cache
cached := & glyph_buffer.cached
resize_soa_non_zero(glyph_pack, len(shape.visible))
profile_begin("batching & segregating glyphs")
// We do any reservation up front as appending to the array's will not check.
reserve(oversized, len(shape.visible))
reserve(to_cache, len(shape.visible))
reserve(cached, len(shape.visible))
clear(oversized)
clear(to_cache)
clear(cached)
reset_batch( & glyph_buffer.batch_cache)
append_sub_pack :: #force_inline proc ( pack : ^[dynamic]i32, entry : i32 )
{
raw := cast(^Raw_Dynamic_Array) pack
raw.len += 1
pack[len(pack) - 1] = entry
}
sub_slice :: #force_inline proc "contextless" ( pack : ^[dynamic]i32) -> []i32 { return pack[:] }
profile_begin("translate")
for & glyph, index in glyph_pack {
// Throughout the draw list generation vis_id will need to be used over index as
// not all glyphs or positions for the shape are visibly rendered.
vis_id := shape.visible[index]
glyph.position = target_position + (shape.position[vis_id]) * target_scale
}
profile_end()
for & glyph, index in glyph_pack
{
// atlas_lru_code, region_kind, and bounds are all 1:1 with shape.visible
atlas_key := shape.atlas_lru_code[index]
region_kind := shape.region_kind[index]
bounds := shape.bounds[index]
bounds_size_scaled := size(bounds) * font_scale
assert(region_kind != .None, "FAILED TO ASSGIN REGION")
when ENABLE_OVERSIZED_GLYPHS
{
if region_kind == .E
{
glyph.over_sample = \
bounds_size_scaled.x <= glyph_buffer_size.x / 2 &&
bounds_size_scaled.y <= glyph_buffer_size.y / 2 ? \
{2.0, 2.0} \
: {1.0, 1.0}
append_sub_pack(oversized, cast(i32) index)
continue
}
}
glyph.over_sample = glyph_buffer.over_sample
region := atlas.regions[region_kind]
glyph.atlas_index = lru_get( & region.state, atlas_key )
// Glyphs are prepared in batches based on the capacity of the batch cache.
Prepare_For_Batch:
{
pack := cached
found_take_slow_path : b8
success : bool
// Determine if we hit the limit for this batch.
if glyph.atlas_index == - 1
{
// Check to see if we reached capacity for the atlas
if region.next_idx > region.state.capacity
{
// We will evict LRU. We must predict which LRU will get evicted, and if it's something we've seen then we need to take slowpath and flush batch.
next_evict_glyph := lru_get_next_evicted( region.state )
found_take_slow_path, success = glyph_buffer.batch_cache.table[next_evict_glyph]
assert(success != false)
// TODO(Ed): This might not be needed with the new pipeline/batching
if (found_take_slow_path) {
break Prepare_For_Batch
}
}
// profile_begin("glyph needs caching")
glyph.atlas_index = atlas_reserve_slot(region, atlas_key)
pack = to_cache
// profile_end()
}
// profile("append cached")
glyph.region_pos, glyph.region_size = atlas_region_bbox(region ^, glyph.atlas_index)
mark_glyph_seen(& glyph_buffer.batch_cache, atlas_key)
append_sub_pack(pack, cast(i32) index)
// TODO(Ed): This might not be needed with the new pipeline/batching
// if (found_take_slow_path) {
// break Prepare_For_Batch
// }
if glyph_buffer.batch_cache.num >= glyph_buffer.batch_cache.cap do break Prepare_For_Batch
continue
}
// Batch has been prepared for a set of glyphs time to generate glyphs.
batch_generate_glyphs_draw_list( draw_list, shape, glyph_pack, sub_slice(cached), sub_slice(to_cache), sub_slice(oversized),
atlas,
glyph_buffer,
atlas_size,
glyph_buffer_size,
entry,
colour,
font_scale,
target_scale
)
reset_batch( & glyph_buffer.batch_cache)
clear(oversized)
clear(to_cache)
clear(cached)
}
profile_end()
if len(oversized) > 0 || glyph_buffer.batch_cache.num > 0
{
// Last batch pass
batch_generate_glyphs_draw_list( draw_list, shape, glyph_pack, sub_slice(cached), sub_slice(to_cache), sub_slice(oversized),
atlas,
glyph_buffer,
atlas_size,
glyph_buffer_size,
entry,
colour,
font_scale,
target_scale,
)
}
cursor_pos = target_position + shape.end_cursor_pos * target_scale
return
}
/*
The glyphs types have been segregated by this point into a batch slice of indices to the glyph_pack
The transform and draw quads are computed first (getting the math done in one spot as possible)
Some of the math from to_cache pass for glyph generation was not moved over (it could be but I'm not sure its worth it)
Order : Oversized first, then to_cache, then cached.
Important: These slices store ids for glyph_pack which matches shape.visible in index.
shape.position and shape.glyph DO NOT.
There are only two places this matters for: getting glyph shapes when doing glyph pass generation for oversized and to_cache iterations.
Oversized and to_cache will both enqueue operations for rendering glyphs to the glyph buffer render target.
The compute section will have operations regarding how many glyphs they may alloate before a flush must occur.
A flush will force one of the following:
* Oversized will have a draw call setup to blit directly from the glyph buffer to the target.
* to_cache will blit the glyphs rendered from the buffer to the atlas.
*/
@(optimization_mode="favor_size")
batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
shape : Shaped_Text,
glyph_pack : ^#soa[dynamic]Glyph_Pack_Entry,
cached : []i32,
to_cache : []i32,
oversized : []i32,
atlas : ^Atlas,
glyph_buffer : ^Glyph_Draw_Buffer,
atlas_size : Vec2,
glyph_buffer_size : Vec2,
entry : Entry,
colour : RGBAN,
font_scale : Vec2,
target_scale : Vec2,
) #no_bounds_check
{
profile(#procedure)
colour := colour
profile_begin("glyph transform & draw quads compute")
for id, index in cached
{
// Quad to for drawing atlas slot to target
glyph := & glyph_pack[id]
bounds := shape.bounds[id]
bounds_scaled := mul(bounds, font_scale)
glyph_scale := size(bounds_scaled) + atlas.glyph_padding
quad := & glyph.draw_quad
quad.dst_pos = glyph.position + (bounds_scaled.p0) * target_scale
quad.dst_scale = (glyph_scale) * target_scale
quad.src_scale = (glyph_scale)
quad.src_pos = (glyph.region_pos)
to_target_space( & quad.src_pos, & quad.src_scale, atlas_size )
}
for id, index in to_cache
{
glyph := & glyph_pack[id]
bounds := shape.bounds[id]
bounds_scaled := mul(bounds, font_scale)
glyph_scale := size(bounds_scaled) + glyph_buffer.draw_padding
f32_allocated_x := cast(f32) glyph_buffer.allocated_x
// Resolve how much space this glyph will allocate in the buffer
buffer_size := glyph_scale * glyph_buffer.over_sample
// Allocate a glyph glyph render target region (FBO)
to_allocate_x := buffer_size.x + 4.0
// If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered.
glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x)
glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) )
// The glyph buffer space transform for generate_glyph_pass_draw_list
draw_transform := & glyph.draw_transform
draw_transform.scale = font_scale * glyph_buffer.over_sample
draw_transform.pos = -1 * (bounds.p0) * draw_transform.scale + glyph_buffer.draw_padding
draw_transform.pos.x += glyph.buffer_x
to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size )
// Allocate the space
glyph_buffer.allocated_x += i32(to_allocate_x)
// Quad to for drawing atlas slot to target (used in generate_cached_draw_list)
draw_quad := & glyph.draw_quad
// Destination (draw_list's target image)
draw_quad.dst_pos = glyph.position + (bounds_scaled.p0) * target_scale
draw_quad.dst_scale = (glyph_scale) * target_scale
// UV Coordinates for sampling the atlas
draw_quad.src_scale = (glyph_scale)
draw_quad.src_pos = (glyph.region_pos)
to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, atlas_size )
}
when ENABLE_OVERSIZED_GLYPHS do for id, index in oversized
{
glyph_padding := vec2(glyph_buffer.draw_padding)
glyph := & glyph_pack[id]
bounds := shape.bounds[id]
bounds_scaled := mul(bounds, font_scale)
bounds_size_scaled := size(bounds_scaled)
f32_allocated_x := cast(f32) glyph_buffer.allocated_x
// Resolve how much space this glyph will allocate in the buffer
buffer_size := (bounds_size_scaled + glyph_padding) * glyph.over_sample
// Allocate a glyph glyph render target region (FBO)
to_allocate_x := buffer_size.x + 2.0
glyph_buffer.allocated_x += i32(to_allocate_x)
// If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered.
glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x)
glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) )
// Quad to for drawing atlas slot to target
draw_quad := & glyph.draw_quad
// Target position (draw_list's target image)
draw_quad.dst_pos = glyph.position + (bounds_scaled.p0 - glyph_padding) * target_scale
draw_quad.dst_scale = (bounds_size_scaled + glyph_padding) * target_scale
// The glyph buffer space transform for generate_glyph_pass_draw_list
draw_transform := & glyph.draw_transform
draw_transform.scale = font_scale * glyph.over_sample
draw_transform.pos = -1 * bounds.p0 * draw_transform.scale + vec2(atlas.glyph_padding)
draw_transform.pos.x += glyph.buffer_x
to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size )
draw_quad.src_pos = Vec2 { glyph.buffer_x, 0 }
draw_quad.src_scale = bounds_size_scaled * glyph.over_sample + glyph_padding
to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, glyph_buffer_size )
}
profile_end()
profile_begin("gen oversized glyphs draw_list")
when ENABLE_OVERSIZED_GLYPHS do if len(oversized) > 0
{
when ENABLE_DRAW_TYPE_VISUALIZATION {
colour.r = 1.0
colour.g = 1.0
colour.b = 0.0
}
for pack_id, index in oversized {
vis_id := shape.visible[pack_id]
error : Allocator_Error
glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[vis_id])
assert(error == .None)
assert(glyph_pack[pack_id].shape != nil)
}
for id, index in oversized
{
glyph := & glyph_pack[id]
bounds := shape.bounds[id]
if glyph.flush_glyph_buffer do flush_glyph_buffer_draw_list(draw_list,
& glyph_buffer.draw_list,
& glyph_buffer.clear_draw_list,
& glyph_buffer.allocated_x
)
generate_glyph_pass_draw_list( draw_list, & glyph_buffer.shape_gen_scratch,
glyph_pack[id].shape,
entry.curve_quality,
bounds,
glyph_pack[id].draw_transform.pos,
glyph_pack[id].draw_transform.scale
)
target_quad := & glyph_pack[id].draw_quad
draw_to_target : Draw_Call
{
draw_to_target.pass = .Target_Uncached
draw_to_target.colour = colour
draw_to_target.start_index = u32(len(draw_list.indices))
blit_quad( draw_list,
target_quad.dst_pos, target_quad.dst_pos + target_quad.dst_scale,
target_quad.src_pos, target_quad.src_pos + target_quad.src_scale )
draw_to_target.end_index = u32(len(draw_list.indices))
}
append( & draw_list.calls, draw_to_target )
}
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x)
for pack_id, index in oversized {
assert(glyph_pack[pack_id].shape != nil)
parser_free_shape(entry.parser_info, glyph_pack[pack_id].shape)
}
}
profile_end()
@(optimization_mode="favor_size")
generate_blit_from_atlas_draw_list :: #force_inline proc (draw_list : ^Draw_List, glyph_pack : #soa[]Glyph_Pack_Entry, sub_pack : []i32, colour : RGBAN )
{
profile(#procedure)
call := Draw_Call_Default
call.pass = .Target
call.colour = colour
for id, index in sub_pack
{
// profile("glyph")
call.start_index = u32(len(draw_list.indices))
quad := glyph_pack[id].draw_quad
blit_quad(draw_list,
quad.dst_pos, quad.dst_pos + quad.dst_scale,
quad.src_pos, quad.src_pos + quad.src_scale
)
call.end_index = u32(len(draw_list.indices))
append(& draw_list.calls, call)
}
}
profile_begin("to_cache: caching to atlas")
if len(to_cache) > 0
{
for pack_id, index in to_cache {
vis_id := shape.visible[pack_id]
error : Allocator_Error
glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[vis_id])
assert(error == .None)
assert(glyph_pack[pack_id].shape != nil)
}
for id, index in to_cache
{
// profile("glyph")
glyph := & glyph_pack[id]
bounds := shape.bounds[id]
bounds_scaled := mul(bounds, font_scale)
bounds_size_scaled := size(bounds_scaled)
if glyph.flush_glyph_buffer do flush_glyph_buffer_draw_list( draw_list,
& glyph_buffer.draw_list,
& glyph_buffer.clear_draw_list,
& glyph_buffer.allocated_x
)
dst_region_pos := glyph.region_pos
dst_region_size := glyph.region_size
to_glyph_buffer_space( & dst_region_pos, & dst_region_size, atlas_size )
clear_target_region : Draw_Call
clear_target_region.pass = .Atlas
clear_target_region.region = .Ignore
clear_target_region.start_index = cast(u32) len(glyph_buffer.clear_draw_list.indices)
blit_quad( & glyph_buffer.clear_draw_list,
dst_region_pos, dst_region_pos + dst_region_size,
{ 1.0, 1.0 }, { 1.0, 1.0 }
)
clear_target_region.end_index = cast(u32) len(glyph_buffer.clear_draw_list.indices)
dst_glyph_pos := glyph.region_pos
dst_glyph_size := bounds_size_scaled + atlas.glyph_padding
dst_glyph_size.x = max(dst_glyph_size.x, ceil(dst_glyph_size.x) * glyph_buffer.snap_glyph_width) // Note(Ed): Can (in specific cases, rare.) improve hinting
dst_glyph_size.y = max(dst_glyph_size.y, ceil(dst_glyph_size.y) * glyph_buffer.snap_glyph_height) // Note(Ed): Seems to improve hinting
to_glyph_buffer_space( & dst_glyph_pos, & dst_glyph_size, atlas_size )
src_position := Vec2 { glyph.buffer_x, 0 }
src_size := (bounds_size_scaled + atlas.glyph_padding) * glyph_buffer.over_sample
src_size.x = max(src_size.x, ceil(src_size.x) * glyph_buffer.snap_glyph_width) // Note(Ed): Can (in specific cases, rare.) improve hinting
src_size.y = max(src_size.y, ceil(src_size.y) * glyph_buffer.snap_glyph_height) // Note(Ed): Seems to improve hinting
to_target_space( & src_position, & src_size, glyph_buffer_size )
blit_to_atlas : Draw_Call
blit_to_atlas.pass = .Atlas
blit_to_atlas.region = .None
blit_to_atlas.start_index = cast(u32) len(glyph_buffer.draw_list.indices)
blit_quad( & glyph_buffer.draw_list,
dst_glyph_pos, dst_glyph_pos + dst_glyph_size,
src_position, src_position + src_size )
blit_to_atlas.end_index = cast(u32) len(glyph_buffer.draw_list.indices)
append( & glyph_buffer.clear_draw_list.calls, clear_target_region )
append( & glyph_buffer.draw_list.calls, blit_to_atlas )
// Render glyph to glyph render target (FBO)
generate_glyph_pass_draw_list( draw_list, & glyph_buffer.shape_gen_scratch,
glyph.shape,
entry.curve_quality,
bounds,
glyph.draw_transform.pos,
glyph.draw_transform.scale
)
}
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x)
for pack_id, index in to_cache {
assert(glyph_pack[pack_id].shape != nil)
parser_free_shape(entry.parser_info, glyph_pack[pack_id].shape)
}
profile_begin("gen_cached_draw_list: to_cache")
when ENABLE_DRAW_TYPE_VISUALIZATION {
colour.r = 1.0
colour.g = 0.0
colour.b = 0.0
}
generate_blit_from_atlas_draw_list( draw_list, glyph_pack[:], to_cache, colour )
profile_end()
}
profile_end()
profile_begin("gen_cached_draw_list: cached")
when ENABLE_DRAW_TYPE_VISUALIZATION {
colour.r = 0.5
colour.g = 0.5
colour.b = 0.5
}
generate_blit_from_atlas_draw_list( draw_list, glyph_pack[:], cached, colour )
profile_end()
}
// Flush the content of the glyph_buffers draw lists to the main draw list
flush_glyph_buffer_draw_list :: proc( #no_alias draw_list, glyph_buffer_draw_list, glyph_buffer_clear_draw_list : ^Draw_List, allocated_x : ^i32 )
{
profile(#procedure)
merge_draw_list( draw_list, glyph_buffer_clear_draw_list )
merge_draw_list( draw_list, glyph_buffer_draw_list)
clear_draw_list( glyph_buffer_draw_list )
clear_draw_list( glyph_buffer_clear_draw_list )
call := Draw_Call_Default
call.pass = .Glyph
call.start_index = 0
call.end_index = 0
call.clear_before_draw = true
append( & draw_list.calls, call )
(allocated_x ^) = 0
}
@(optimization_mode="favor_size")
clear_draw_list :: #force_inline proc ( draw_list : ^Draw_List ) {
clear( & draw_list.calls )
clear( & draw_list.indices )
clear( & draw_list.vertices )
}
// Helper used by flush_glyph_buffer_draw_list. Used to append all the content from the src draw list o the destination.
@(optimization_mode="favor_size")
merge_draw_list :: proc ( #no_alias dst, src : ^Draw_List ) #no_bounds_check
{
profile(#procedure)
error : Allocator_Error
v_offset := cast(u32) len( dst.vertices )
num_appended : int
num_appended, error = append( & dst.vertices, ..src.vertices[:] )
assert( error == .None )
i_offset := cast(u32) len(dst.indices)
for index : i32 = 0; index < cast(i32) len(src.indices); index += 1 {
ignored : int
ignored, error = append( & dst.indices, src.indices[index] + v_offset )
assert( error == .None )
}
for index : i32 = 0; index < cast(i32) len(src.calls); index += 1 {
src_call := src.calls[ index ]
src_call.start_index += i_offset
src_call.end_index += i_offset
append( & dst.calls, src_call )
assert( error == .None )
}
}
// Naive implmentation to merge passes that are equivalent and the following to be merged (b for can_merge_draw_calls) doesn't have a clear todo.
// Its intended for optimiztion passes to occur on a per-layer basis.
optimize_draw_list :: proc (draw_list: ^Draw_List, call_offset: int) #no_bounds_check
{
profile(#procedure)
assert(draw_list != nil)
can_merge_draw_calls :: #force_inline proc "contextless" ( a, b : ^Draw_Call ) -> bool {
result := \
a.pass == b.pass &&
a.end_index == b.start_index &&
a.region == b.region &&
a.colour == b.colour &&
! b.clear_before_draw
return result
}
write_index := call_offset
for read_index := call_offset + 1; read_index < len(draw_list.calls); read_index += 1
{
draw_current := & draw_list.calls[write_index]
draw_next := & draw_list.calls[read_index]
if can_merge_draw_calls(draw_current, draw_next) {
draw_current.end_index = draw_next.end_index
}
else {
// Move to the next write position and copy the draw call
write_index += 1
if write_index != read_index {
draw_list.calls[write_index] = (draw_next^)
}
}
}
resize( & draw_list.calls, write_index + 1)
}

View File

@@ -0,0 +1,3 @@
package vefontcache
// TODO(Ed): Bring in KT1CX and see how it performs.

209
code2/vefontcache/misc.odin Normal file
View File

@@ -0,0 +1,209 @@
package vefontcache
/*
Didn't want to splinter this into more files..
Just a bunch of utilities.
*/
import "core:simd"
import core_log "core:log"
peek_array :: #force_inline proc "contextless" ( self : [dynamic]$Type ) -> Type {
return self[ len(self) - 1 ]
}
reload_array :: #force_inline proc( self : ^[dynamic]$Type, allocator : Allocator ) {
raw := transmute( ^Raw_Dynamic_Array) self
raw.allocator = allocator
}
reload_array_soa :: #force_inline proc( self : ^#soa[dynamic]$Type, allocator : Allocator ) {
raw := raw_soa_footer(self)
raw.allocator = allocator
}
reload_map :: #force_inline proc( self : ^map [$KeyType] $EntryType, allocator : Allocator ) {
raw := transmute( ^Raw_Map) self
raw.allocator = allocator
}
to_bytes :: #force_inline proc "contextless" ( typed_data : ^$Type ) -> []byte { return slice_ptr( transmute(^byte) typed_data, size_of(Type) ) }
@(optimization_mode="favor_size")
djb8_hash :: #force_inline proc "contextless" ( hash : ^$Type, bytes : []byte ) { for value in bytes do (hash^) = (( (hash^) << 8) + (hash^) ) + Type(value) }
RGBA8 :: [4]u8
RGBAN :: [4]f32
Vec2 :: [2]f32
Vec2i :: [2]i32
Vec2_64 :: [2]f64
Transform :: struct {
pos : Vec2,
scale : Vec2,
}
Range2 :: struct {
p0, p1 : Vec2,
}
mul_range2_vec2 :: #force_inline proc "contextless" ( range : Range2, v : Vec2 ) -> Range2 { return { range.p0 * v, range.p1 * v } }
size_range2 :: #force_inline proc "contextless" ( range : Range2 ) -> Vec2 { return range.p1 - range.p0 }
vec2_from_scalar :: #force_inline proc "contextless" ( scalar : f32 ) -> Vec2 { return { scalar, scalar }}
vec2_64_from_vec2 :: #force_inline proc "contextless" ( v2 : Vec2 ) -> Vec2_64 { return { f64(v2.x), f64(v2.y) }}
vec2_from_vec2i :: #force_inline proc "contextless" ( v2i : Vec2i ) -> Vec2 { return { f32(v2i.x), f32(v2i.y) }}
vec2i_from_vec2 :: #force_inline proc "contextless" ( v2 : Vec2 ) -> Vec2i { return { i32(v2.x), i32(v2.y) }}
@(require_results) ceil_vec2 :: proc "contextless" ( v : Vec2 ) -> Vec2 { return { ceil_f32(v.x), ceil_f32(v.y) } }
@(require_results) floor_vec2 :: proc "contextless" ( v : Vec2 ) -> Vec2 { return { floor_f32(v.x), floor_f32(v.y) } }
// This buffer is used below excluisvely to prevent any allocator recursion when verbose logging from allocators.
// This means a single line is limited to 4k buffer
// Logger_Allocator_Buffer : [4 * Kilobyte]u8
log :: proc( msg : string, level := core_log.Level.Info, loc := #caller_location ) {
// temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
// context.allocator = arena_allocator(& temp_arena)
// context.temp_allocator = arena_allocator(& temp_arena)
core_log.log( level, msg, location = loc )
}
logf :: proc( fmt : string, args : ..any, level := core_log.Level.Info, loc := #caller_location ) {
// temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
// context.allocator = arena_allocator(& temp_arena)
// context.temp_allocator = arena_allocator(& temp_arena)
core_log.logf( level, fmt, ..args, location = loc )
}
@(optimization_mode="favor_size")
to_glyph_buffer_space :: #force_inline proc "contextless" ( #no_alias position, scale : ^Vec2, size : Vec2 )
{
pos := position^
scale_32 := scale^
quotient : Vec2 = 1.0 / size
pos = pos * quotient * 2.0 - 1.0
scale_32 = scale_32 * quotient * 2.0
(position^) = pos
(scale^) = scale_32
}
@(optimization_mode="favor_size")
to_target_space :: #force_inline proc "contextless" ( #no_alias position, scale : ^Vec2, size : Vec2 )
{
quotient : Vec2 = 1.0 / size
(position^) *= quotient
(scale^) *= quotient
}
USE_MANUAL_SIMD_FOR_BEZIER_OPS :: true
when ! USE_MANUAL_SIMD_FOR_BEZIER_OPS
{
// For a provided alpha value,
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
// ve_fontcache_eval_bezier (quadratic)
eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2
{
weight_start := (1 - alpha) * (1 - alpha)
weight_control := 2.0 * (1 - alpha) * alpha
weight_end := alpha * alpha
starting_point := p0 * weight_start
control_point := p1 * weight_control
end_point := p2 * weight_end
point := starting_point + control_point + end_point
return { f32(point.x), f32(point.y) }
}
// For a provided alpha value,
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
// ve_fontcache_eval_bezier (cubic)
eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2
{
weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha)
weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha
weight_c_b := 3 * (1 - alpha) * alpha * alpha
weight_end := alpha * alpha * alpha
start_point := p0 * weight_start
control_a := p1 * weight_c_a
control_b := p2 * weight_c_b
end_point := p3 * weight_end
point := start_point + control_a + control_b + end_point
return { f32(point.x), f32(point.y) }
}
}
else
{
Vec2_SIMD :: simd.f32x4
@(optimization_mode="favor_size")
vec2_to_simd :: #force_inline proc "contextless" (v: Vec2) -> Vec2_SIMD {
return Vec2_SIMD{v.x, v.y, 0, 0}
}
@(optimization_mode="favor_size")
simd_to_vec2 :: #force_inline proc "contextless" (v: Vec2_SIMD) -> Vec2 {
return Vec2{ simd.extract(v, 0), simd.extract(v, 1) }
}
@(optimization_mode="favor_size")
eval_point_on_bezier3 :: #force_inline proc "contextless" (p0, p1, p2: Vec2, alpha: f32) -> Vec2
{
simd_p0 := vec2_to_simd(p0)
simd_p1 := vec2_to_simd(p1)
simd_p2 := vec2_to_simd(p2)
one_minus_alpha := 1.0 - alpha
weight_start := one_minus_alpha * one_minus_alpha
weight_control := 2.0 * one_minus_alpha * alpha
weight_end := alpha * alpha
simd_weights := Vec2_SIMD{weight_start, weight_control, weight_end, 0}
result := simd.add(
simd.add(
simd.mul( simd_p0, simd.swizzle( simd_weights, 0, 0, 0, 0) ),
simd.mul( simd_p1, simd.swizzle( simd_weights, 1, 1, 1, 1) )
),
simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) )
)
return simd_to_vec2(result)
}
@(optimization_mode="favor_size")
eval_point_on_bezier4 :: #force_inline proc "contextless" (p0, p1, p2, p3: Vec2, alpha: f32) -> Vec2
{
simd_p0 := vec2_to_simd(p0)
simd_p1 := vec2_to_simd(p1)
simd_p2 := vec2_to_simd(p2)
simd_p3 := vec2_to_simd(p3)
one_minus_alpha := 1.0 - alpha
weight_start := one_minus_alpha * one_minus_alpha * one_minus_alpha
weight_c_a := 3 * one_minus_alpha * one_minus_alpha * alpha
weight_c_b := 3 * one_minus_alpha * alpha * alpha
weight_end := alpha * alpha * alpha
simd_weights := Vec2_SIMD { weight_start, weight_c_a, weight_c_b, weight_end }
result := simd.add(
simd.add(
simd.mul( simd_p0, simd.swizzle(simd_weights, 0, 0, 0, 0) ),
simd.mul( simd_p1, simd.swizzle(simd_weights, 1, 1, 1, 1) )
),
simd.add(
simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ),
simd.mul( simd_p3, simd.swizzle(simd_weights, 3, 3, 3, 3) )
)
)
return simd_to_vec2(result)
}
}

View File

@@ -0,0 +1,196 @@
package vefontcache
/*
Notes:
This is a minimal wrapper I originally did incase a font parser other than stb_truetype is introduced in the future.
Otherwise, its essentially 1:1 with it.
Freetype isn't really supported and its not a high priority.
~~Freetype will do memory allocations and has an interface the user can implement.~~
~~That interface is not exposed from this parser but could be added to parser_init.~~
STB_Truetype:
* Added ability to set the stb_truetype allocator for STBTT_MALLOC and STBTT_FREE.
* Changed procedure signatures to pass the font_info struct by immutable ptr (#by_ptr)
when the C equivalent has their parameter as `const*`.
*/
import "core:c"
import stbtt "thirdparty:stb/truetype"
// import freetype "thirdparty:freetype"
Parser_Kind :: enum u32 {
STB_TrueType,
Freetype, // Currently not implemented.
}
Parser_Font_Info :: struct {
label : string,
kind : Parser_Kind,
stbtt_info : stbtt.fontinfo,
// freetype_info : freetype.Face
data : []byte,
}
Glyph_Vert_Type :: enum u8 {
None,
Move = 1,
Line,
Curve,
Cubic,
}
// Based directly off of stb_truetype's vertex
Parser_Glyph_Vertex :: struct {
x, y : i16,
contour_x0, contour_y0 : i16,
contour_x1, contour_y1 : i16,
type : Glyph_Vert_Type,
padding : u8,
}
// A shape can be a dynamic array free_type or an opaque set of data handled by stb_truetype
Parser_Glyph_Shape :: [dynamic]Parser_Glyph_Vertex
Parser_Context :: struct {
lib_backing : Allocator,
kind : Parser_Kind,
// ft_library : freetype.Library,
}
parser_stbtt_allocator_proc :: proc(
allocator_data : rawptr,
type : stbtt.zpl_allocator_type,
size : c.ssize_t,
alignment : c.ssize_t,
old_memory : rawptr,
old_size : c.ssize_t,
flags : c.ulonglong
) -> rawptr
{
allocator := transmute(^Allocator) allocator_data
result, error := allocator.procedure( allocator.data, cast(Allocator_Mode) type, cast(int) size, cast(int) alignment, old_memory, cast(int) old_size )
assert(error == .None)
if type == .Alloc || type == .Resize {
raw := transmute(Raw_Slice) result
// assert(raw.len > 0, "Allocation is 0 bytes?")
return transmute(rawptr) raw.data
}
else do return nil
}
parser_init :: proc( ctx : ^Parser_Context, kind : Parser_Kind, allocator := context.allocator )
{
ctx.kind = kind
ctx.lib_backing = allocator
stbtt_allocator := stbtt.zpl_allocator { parser_stbtt_allocator_proc, & ctx.lib_backing }
stbtt.SetAllocator( stbtt_allocator )
}
parser_reload :: proc( ctx : ^Parser_Context, allocator := context.allocator) {
ctx.lib_backing = allocator
stbtt_allocator := stbtt.zpl_allocator { parser_stbtt_allocator_proc, & ctx.lib_backing }
stbtt.SetAllocator( stbtt_allocator )
}
parser_shutdown :: proc( ctx : ^Parser_Context ) {
// Note: Not necesssary for stb_truetype
}
parser_load_font :: proc( ctx : ^Parser_Context, label : string, data : []byte ) -> (font : Parser_Font_Info, error : b32)
{
error = ! stbtt.InitFont( & font.stbtt_info, raw_data(data), 0 )
font.label = label
font.data = data
font.kind = ctx.kind
return
}
parser_unload_font :: proc( font : ^Parser_Font_Info )
{
// case .STB_TrueType:
// Do Nothing
}
parser_find_glyph_index :: #force_inline proc "contextless" ( font : Parser_Font_Info, codepoint : rune ) -> (glyph_index : Glyph)
{
glyph_index = transmute(Glyph) stbtt.FindGlyphIndex( font.stbtt_info, codepoint )
return
}
parser_free_shape :: #force_inline proc( font : Parser_Font_Info, shape : Parser_Glyph_Shape )
{
shape := shape
shape_raw := transmute( ^Raw_Dynamic_Array) & shape
stbtt.FreeShape( font.stbtt_info, transmute( [^]stbtt.vertex) shape_raw.data )
}
parser_get_codepoint_horizontal_metrics :: #force_inline proc "contextless" ( font : Parser_Font_Info, codepoint : rune ) -> ( advance, to_left_side_glyph : i32 )
{
stbtt.GetCodepointHMetrics( font.stbtt_info, codepoint, & advance, & to_left_side_glyph )
return
}
parser_get_codepoint_kern_advance :: #force_inline proc "contextless" ( font : Parser_Font_Info, prev_codepoint, codepoint : rune ) -> i32
{
kern := stbtt.GetCodepointKernAdvance( font.stbtt_info, prev_codepoint, codepoint )
return kern
}
parser_get_font_vertical_metrics :: #force_inline proc "contextless" ( font : Parser_Font_Info ) -> (ascent, descent, line_gap : i32 )
{
stbtt.GetFontVMetrics( font.stbtt_info, & ascent, & descent, & line_gap )
return
}
parser_get_bounds :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> (bounds : Range2)
{
// profile(#procedure)
bounds_0, bounds_1 : Vec2i
x0, y0, x1, y1 : i32
success := cast(bool) stbtt.GetGlyphBox( font.stbtt_info, i32(glyph_index), & x0, & y0, & x1, & y1 )
bounds_0 = { x0, y0 }
bounds_1 = { x1, y1 }
bounds = { vec2(bounds_0), vec2(bounds_1) }
return
}
parser_get_glyph_shape :: #force_inline proc ( font : Parser_Font_Info, glyph_index : Glyph ) -> (shape : Parser_Glyph_Shape, error : Allocator_Error)
{
stb_shape : [^]stbtt.vertex
nverts := stbtt.GetGlyphShape( font.stbtt_info, cast(i32) glyph_index, & stb_shape )
shape_raw := transmute( ^Raw_Dynamic_Array) & shape
shape_raw.data = stb_shape
shape_raw.len = int(nverts)
shape_raw.cap = int(nverts)
shape_raw.allocator = nil_allocator()
error = Allocator_Error.None
return
}
parser_is_glyph_empty :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> b32
{
return stbtt.IsGlyphEmpty( font.stbtt_info, cast(c.int) glyph_index )
}
parser_scale :: #force_inline proc "contextless" ( font : Parser_Font_Info, size : f32 ) -> f32
{
// profile(#procedure)
size_scale := size > 0.0 ? parser_scale_for_mapping_em_to_pixels( font, size ) : parser_scale_for_pixel_height( font, -size )
return size_scale
}
parser_scale_for_pixel_height :: #force_inline proc "contextless" ( font : Parser_Font_Info, size : f32 ) -> f32
{
return stbtt.ScaleForPixelHeight( font.stbtt_info, size )
}
parser_scale_for_mapping_em_to_pixels :: #force_inline proc "contextless" ( font : Parser_Font_Info, size : f32 ) -> f32
{
return stbtt.ScaleForMappingEmToPixels( font.stbtt_info, size )
}

View File

@@ -0,0 +1,149 @@
package vefontcache
import "base:builtin"
resize_soa_non_zero :: non_zero_resize_soa
import "base:runtime"
Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
Raw_Map :: runtime.Raw_Map
Raw_Slice :: runtime.Raw_Slice
raw_soa_footer :: runtime.raw_soa_footer
nil_allocator :: runtime.nil_allocator
import "core:hash"
ginger16 :: hash.ginger16
import "core:math"
ceil_f16 :: math.ceil_f16
ceil_f16le :: math.ceil_f16le
ceil_f16be :: math.ceil_f16be
ceil_f32 :: math.ceil_f32
ceil_f32le :: math.ceil_f32le
ceil_f32be :: math.ceil_f32be
ceil_f64 :: math.ceil_f64
ceil_f64le :: math.ceil_f64le
ceil_f64be :: math.ceil_f64be
floor_f16 :: math.floor_f16
floor_f16le :: math.floor_f16le
floor_f16be :: math.floor_f16be
floor_f32 :: math.floor_f32
floor_f32le :: math.floor_f32le
floor_f32be :: math.floor_f32be
floor_f64 :: math.floor_f64
floor_f64le :: math.floor_f64le
floor_f64be :: math.floor_f64be
import "core:math/linalg"
import "core:mem"
Kilobyte :: mem.Kilobyte
slice_ptr :: mem.slice_ptr
Allocator :: mem.Allocator
Allocator_Error :: mem.Allocator_Error
Allocator_Mode :: mem.Allocator_Mode
Arena :: mem.Arena
arena_allocator :: mem.arena_allocator
arena_init :: mem.arena_init
import "core:slice"
import "core:unicode"
//#region("Proc overload mappings")
append :: proc {
append_elem,
append_elems,
append_elem_string,
}
append_soa :: proc {
append_soa_elem,
}
ceil :: proc {
math.ceil_f16,
math.ceil_f16le,
math.ceil_f16be,
math.ceil_f32,
math.ceil_f32le,
math.ceil_f32be,
math.ceil_f64,
math.ceil_f64le,
math.ceil_f64be,
ceil_vec2,
}
clear :: proc {
builtin.clear_dynamic_array,
builtin.clear_map,
}
floor :: proc {
math.floor_f16,
math.floor_f16le,
math.floor_f16be,
math.floor_f32,
math.floor_f32le,
math.floor_f32be,
math.floor_f64,
math.floor_f64le,
math.floor_f64be,
floor_vec2,
}
fill :: proc {
slice.fill,
}
max :: proc {
linalg.max_single,
linalg.max_double,
}
make :: proc {
builtin.make_dynamic_array,
builtin.make_dynamic_array_len,
builtin.make_dynamic_array_len_cap,
builtin.make_slice,
builtin.make_map,
builtin.make_map_cap,
}
make_soa :: proc {
builtin.make_soa_dynamic_array_len_cap,
builtin.make_soa_slice,
}
mul :: proc {
mul_range2_vec2,
}
peek :: proc {
peek_array,
}
resize :: proc {
builtin.resize_dynamic_array,
}
round :: proc {
math.round_f32,
}
size :: proc {
size_range2,
}
vec2 :: proc {
vec2_from_scalar,
vec2_from_vec2i,
}
vec2i :: proc {
vec2i_from_vec2,
}
vec2_64 :: proc {
vec2_64_from_vec2,
}
//#endregion("Proc overload mappings")

View File

@@ -0,0 +1,17 @@
package vefontcache
// Add profiling hookup here
// import ""
@(deferred_none = profile_end, disabled = DISABLE_PROFILING)
profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
}
@(disabled = DISABLE_PROFILING)
profile_begin :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
}
@(disabled = DISABLE_PROFILING)
profile_end :: #force_inline proc "contextless" () {
}

View File

@@ -0,0 +1,486 @@
package vefontcache
/*
Note(Ed): The only reason I didn't directly use harfbuzz is:
https://github.com/saidwho12/hamza
and seems to be under active development as an alternative.
*/
import "core:c"
import "thirdparty:harfbuzz"
Shape_Key :: u32
/* A text whose codepoints have had their relevant glyphs and
associated data resolved for processing in a draw list generation stage.
Traditionally a shape only refers to resolving which glyph and
its position should be used for rendering.
For this library's case it also resolves any content that does not have to be done
on a per-frame basis for draw list generation:
* atlas lru codes
* glyph bounds and scale
* atlas region the glyph is associated with.
Ideally the user should resolve this shape once and cache/store it on their side.
They have the best ability to avoid costly lookups.
*/
Shaped_Text :: struct #packed {
glyph : [dynamic]Glyph,
position : [dynamic]Vec2,
visible : [dynamic]i32,
atlas_lru_code : [dynamic]Atlas_Key,
region_kind : [dynamic]Atlas_Region_Kind,
bounds : [dynamic]Range2,
end_cursor_pos : Vec2,
size : Vec2,
font : Font_ID,
px_size : f32,
}
// Ease of use cache, can handle thousands of lookups per frame with ease.
// TODO(Ed) It might perform better with a tailored made hashtable implementation for the LRU_Cache or dedicated array struct/procs for the Shaped_Text.
Shaped_Text_Cache :: struct {
storage : [dynamic]Shaped_Text,
state : LRU_Cache(Shape_Key),
next_cache_id : i32,
}
// Used by shaper_shape_text_cached, allows user to specify their own proc at compile-time without having to rewrite the caching implementation.
Shaper_Shape_Text_Uncached_Proc :: #type proc( ctx : ^Shaper_Context,
atlas : Atlas,
glyph_buffer_size : Vec2,
font : Font_ID,
entry : Entry,
font_px_Size : f32,
font_scale : f32,
text_utf8 : string,
output : ^Shaped_Text
)
// Note(Ed): Not used..
Shaper_Kind :: enum {
Latin = 0,
Harfbuzz = 1,
}
// Not much here other than just keep track of a harfbuzz var and deciding to keep runtime config here used by the shapers.
Shaper_Context :: struct {
hb_buffer : harfbuzz.Buffer,
snap_glyph_position : b32,
adv_snap_small_font_threshold : f32,
}
// Only used with harbuzz for now. Resolved during load_font for a font Entry.
Shaper_Info :: struct {
blob : harfbuzz.Blob,
face : harfbuzz.Face,
font : harfbuzz.Font,
}
shaper_init :: proc( ctx : ^Shaper_Context )
{
ctx.hb_buffer = harfbuzz.buffer_create()
assert( ctx.hb_buffer != nil, "VEFontCache.shaper_init: Failed to create harfbuzz buffer")
}
shaper_shutdown :: proc( ctx : ^Shaper_Context )
{
if ctx.hb_buffer != nil {
harfbuzz.buffer_destroy( ctx.hb_buffer )
}
}
shaper_load_font :: #force_inline proc( ctx : ^Shaper_Context, label : string, data : []byte, user_data : rawptr = nil ) -> (info : Shaper_Info)
{
info.blob = harfbuzz.blob_create( raw_data(data), cast(c.uint) len(data), harfbuzz.Memory_Mode.READONLY, user_data, nil )
info.face = harfbuzz.face_create( info.blob, 0 )
info.font = harfbuzz.font_create( info.face )
return
}
shaper_unload_font :: #force_inline proc( info : ^Shaper_Info )
{
if info.font != nil do harfbuzz.font_destroy( info.font )
if info.face != nil do harfbuzz.face_destroy( info.face )
if info.blob != nil do harfbuzz.blob_destroy( info.blob )
}
// TODO(Ed): Allow the user to override snap_glyph_position of the shaper context on a per-call basis (as a param)
// Recommended shaper. Very performant.
// TODO(Ed): Would be nice to properly support vertical shaping, right now its strictly just horizontal...
@(optimization_mode="favor_size")
shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context,
atlas : Atlas,
glyph_buffer_size : Vec2,
font : Font_ID,
entry : Entry,
font_px_size : f32,
font_scale : f32,
text_utf8 : string,
output : ^Shaped_Text
)
{
profile(#procedure)
assert( ctx != nil )
clear( & output.glyph )
clear( & output.position )
clear( & output.visible )
current_script := harfbuzz.Script.UNKNOWN
hb_ucfunc := harfbuzz.unicode_funcs_get_default()
harfbuzz.buffer_clear_contents( ctx.hb_buffer )
ascent := entry.ascent
descent := entry.descent
line_gap := entry.line_gap
max_line_width := f32(0)
line_count := 1
line_height := ((ascent - descent + line_gap) * font_scale)
position : Vec2
@(optimization_mode="favor_size")
shape_run :: proc( output : ^Shaped_Text,
entry : Entry,
buffer : harfbuzz.Buffer,
script : harfbuzz.Script,
position : ^Vec2,
max_line_width : ^f32,
line_count : ^int,
font_px_size : f32,
font_scale : f32,
snap_shape_pos : b32,
adv_snap_small_font_threshold : f32
)
{
profile(#procedure)
harfbuzz.buffer_set_script ( buffer, script )
harfbuzz.buffer_set_direction( buffer, harfbuzz.script_get_horizontal_direction( script ))
harfbuzz.buffer_set_language ( buffer, harfbuzz.language_get_default() )
// Perform the actual shaping of this run using HarfBuzz.
harfbuzz.buffer_set_content_type( buffer, harfbuzz.Buffer_Content_Type.UNICODE )
harfbuzz.shape( entry.shaper_info.font, buffer, nil, 0 )
// Loop over glyphs and append to output buffer.
glyph_count : u32
glyph_infos := harfbuzz.buffer_get_glyph_infos( buffer, & glyph_count )
glyph_positions := harfbuzz.buffer_get_glyph_positions( buffer, & glyph_count )
line_height := (entry.ascent - entry.descent + entry.line_gap) * font_scale
last_cluster := u32(0)
for index : i32; index < i32(glyph_count); index += 1
{
hb_glyph := glyph_infos [ index ]
hb_gposition := glyph_positions[ index ]
glyph := cast(Glyph) hb_glyph.codepoint
if hb_glyph.cluster > 0
{
(max_line_width^) = max( max_line_width^, position.x )
position.x = 0.0
position.y -= line_height
position.y = floor(position.y)
(line_count^) += 1
last_cluster = hb_glyph.cluster
continue
}
if abs( font_px_size ) <= adv_snap_small_font_threshold {
(position^) = ceil( position^ )
}
glyph_pos := position^
offset := Vec2 { f32(hb_gposition.x_offset), f32(hb_gposition.y_offset) } * font_scale
glyph_pos += offset
if snap_shape_pos {
glyph_pos = ceil(glyph_pos)
}
advance := Vec2 {
f32(hb_gposition.x_advance) * font_scale,
f32(hb_gposition.y_advance) * font_scale
}
(position^) += advance
(max_line_width^) = max(max_line_width^, position.x)
// We track all glyphs so that user can use the shape for navigation purposes.
append( & output.glyph, glyph )
append( & output.position, glyph_pos)
// We don't accept all glyphs for rendering, harfbuzz preserves positions of non-visible codepoints (as .notdef glyphs)
// We also double check to make sure the glyph isn't detected for drawing by the parser.
visible_glyph := glyph != 0 && ! parser_is_glyph_empty(entry.parser_info, glyph)
if visible_glyph {
append( & output.visible, cast(i32) len(output.glyph) - 1 )
}
}
output.end_cursor_pos = position^
harfbuzz.buffer_clear_contents( buffer )
}
// Note(Original Author):
// We first start with simple bidi and run logic.
// True CTL is pretty hard and we don't fully support that; patches welcome!
for codepoint, byte_offset in text_utf8
{
hb_codepoint := cast(harfbuzz.Codepoint) codepoint
script := harfbuzz.unicode_script( hb_ucfunc, hb_codepoint )
// Can we continue the current run?
ScriptKind :: harfbuzz.Script
// These scripts don't break runs because they don't represent script transitions - they adapt to their context.
// Maintaining the current shaping run for these scripts ensures correct processing of marks, numbers,
// and punctuation within the primary text flow.
is_neutral_script := script == ScriptKind.UNKNOWN || script == ScriptKind.INHERITED || script == ScriptKind.COMMON
// Essentially if the script is neutral, or the same as current,
// or this is the first codepoint: add it to the buffer and continue the loop.
if is_neutral_script \
|| script == current_script \
|| byte_offset == 0
{
harfbuzz.buffer_add( ctx.hb_buffer, hb_codepoint, codepoint == '\n' ? 1 : 0 )
current_script = is_neutral_script ? current_script : script
continue
}
// End current run since we've encountred a significant script change.
shape_run( output,
entry,
ctx.hb_buffer,
current_script,
& position,
& max_line_width,
& line_count,
font_px_size,
font_scale,
ctx.snap_glyph_position,
ctx.adv_snap_small_font_threshold
)
harfbuzz.buffer_add( ctx.hb_buffer, hb_codepoint, codepoint == '\n' ? 1 : 0 )
current_script = script
}
// End the last run if needed
shape_run( output,
entry,
ctx.hb_buffer,
current_script,
& position,
& max_line_width,
& line_count,
font_px_size,
font_scale,
ctx.snap_glyph_position,
ctx.adv_snap_small_font_threshold
)
// Set the final size
output.size.x = max_line_width
output.size.y = f32(line_count) * line_height
// Resolve each glyphs: bounds, atlas lru, and the atlas region as we have everything we need now.
resize( & output.atlas_lru_code, len(output.visible) )
resize( & output.region_kind, len(output.visible) )
resize( & output.bounds, len(output.visible) )
profile_begin("atlas_lru_code")
for vis_id, index in output.visible {
glyph_id := output.glyph[vis_id]
output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, glyph_id)
// atlas_lru_code is 1:1 with visible index
}
profile_end()
profile_begin("bounds & region")
for vis_id, index in output.visible {
glyph_id := output.glyph[vis_id]
bounds := & output.bounds[index]
(bounds ^) = parser_get_bounds( entry.parser_info, glyph_id )
bounds_size_scaled := (bounds.p1 - bounds.p0) * font_scale
output.region_kind[index] = atlas_decide_region( atlas, glyph_buffer_size, bounds_size_scaled )
// bounds & region_kind are 1:1 with visible index
}
profile_end()
output.font = font
output.px_size = font_px_size
return
}
// TODO(Ed): Allow the user to override snap_glyph_position of the shaper context on a per-call basis (as an param)
// Basic western alphabet based shaping. Not that much faster than harfbuzz if at all.
shaper_shape_text_latin :: proc( ctx : ^Shaper_Context,
atlas : Atlas,
glyph_buffer_size : Vec2,
font : Font_ID,
entry : Entry,
font_px_size : f32,
font_scale : f32,
text_utf8 : string,
output : ^Shaped_Text
)
{
profile(#procedure)
assert( ctx != nil )
clear( & output.glyph )
clear( & output.position )
clear( & output.visible )
line_height := (entry.ascent - entry.descent + entry.line_gap) * font_scale
line_count : int = 1
max_line_width : f32 = 0
position : Vec2
prev_codepoint : rune
for codepoint, index in text_utf8
{
if prev_codepoint > 0 {
kern := parser_get_codepoint_kern_advance( entry.parser_info, prev_codepoint, codepoint )
position.x += f32(kern) * font_scale
}
if codepoint == '\n'
{
line_count += 1
max_line_width = max(max_line_width, position.x)
position.x = 0.0
position.y -= line_height
position.y = position.y
prev_codepoint = rune(0)
continue
}
if abs( font_px_size ) <= ctx.adv_snap_small_font_threshold {
position.x = ceil(position.x)
}
glyph_index := parser_find_glyph_index( entry.parser_info, codepoint )
is_glyph_empty := parser_is_glyph_empty( entry.parser_info, glyph_index )
if ctx.snap_glyph_position {
position.x = ceil(position.x)
position.y = ceil(position.y)
}
append( & output.glyph, glyph_index)
append( & output.position, position)
if ! is_glyph_empty {
append( & output.visible, cast(i32) len(output.glyph) - 1 )
}
advance, _ := parser_get_codepoint_horizontal_metrics( entry.parser_info, codepoint )
position.x += f32(advance) * font_scale
prev_codepoint = codepoint
}
output.end_cursor_pos = position
max_line_width = max(max_line_width, position.x)
output.size.x = max_line_width
output.size.y = f32(line_count) * line_height
// Resolve each glyphs: bounds, atlas lru, and the atlas region as we have everything we need now.
resize( & output.atlas_lru_code, len(output.glyph) )
resize( & output.region_kind, len(output.glyph) )
resize( & output.bounds, len(output.glyph) )
profile_begin("atlas_lru_code")
for vis_id, index in output.visible {
glyph_id := output.glyph[vis_id]
output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, glyph_id)
// atlas_lru_code is 1:1 with visible index
}
profile_end()
profile_begin("bounds & region")
for vis_id, index in output.visible {
glyph_id := output.glyph[vis_id]
bounds := & output.bounds[index]
(bounds ^) = parser_get_bounds( entry.parser_info, glyph_id )
bounds_size_scaled := (bounds.p1 - bounds.p0) * font_scale
output.region_kind[index] = atlas_decide_region( atlas, glyph_buffer_size, bounds_size_scaled )
// bounds & region_kind are 1:1 with visible index
}
profile_end()
output.font = font
output.px_size = font_px_size
}
// Shapes are tracked by the library's context using the shape cache
// and the key is resolved using the font, the desired pixel size, and the text bytes to be shaped.
// Thus this procedures cost will be proporitonal to how much text it has to sift through.
// djb8_hash is used as its been pretty good for thousands of hashed lines that around 6-250 charactes long
// (and its very fast).
@(optimization_mode="favor_size")
shaper_shape_text_cached :: proc( text_utf8 : string,
ctx : ^Shaper_Context,
shape_cache : ^Shaped_Text_Cache,
atlas : Atlas,
glyph_buffer_size : Vec2,
font : Font_ID,
entry : Entry,
font_px_size : f32,
font_scale : f32,
shape_text_uncached : $Shaper_Shape_Text_Uncached_Proc
) -> (shaped_text : Shaped_Text)
{
profile(#procedure)
font := font
font_px_size := font_px_size
font_bytes := to_bytes( & font )
size_bytes := to_bytes( & font_px_size )
text_bytes := transmute( []byte) text_utf8
lru_code : Shape_Key
djb8_hash( & lru_code, font_bytes )
djb8_hash( & lru_code, size_bytes )
djb8_hash( & lru_code, text_bytes )
state := & shape_cache.state
shape_cache_idx := lru_get( state, lru_code )
if shape_cache_idx == -1
{
if shape_cache.next_cache_id < i32(state.capacity){
shape_cache_idx = shape_cache.next_cache_id
shape_cache.next_cache_id += 1
evicted := lru_put( state, lru_code, shape_cache_idx )
}
else
{
next_evict_idx := lru_get_next_evicted( state ^ )
assert( next_evict_idx != LRU_Fail_Mask_32 )
shape_cache_idx = lru_peek( state ^, next_evict_idx, must_find = true )
assert( shape_cache_idx != - 1 )
lru_put( state, lru_code, shape_cache_idx )
}
storage_entry := & shape_cache.storage[ shape_cache_idx ]
shape_text_uncached( ctx, atlas, glyph_buffer_size, font, entry, font_px_size, font_scale, text_utf8, storage_entry )
shaped_text = storage_entry ^
return
}
shaped_text = shape_cache.storage[ shape_cache_idx ]
return
}

File diff suppressed because it is too large Load Diff