render/d3d11: adjust allowed frame 'latency' (frames-in-queue) based on number of active windows, to not unnecessarily block if we have two windows to build frames for; demon/linux: sketch out register writing path, ensure reg read/write paths have slots for other architectures

This commit is contained in:
Ryan Fleury
2025-07-31 14:09:42 -07:00
parent ad735dab5e
commit 1a19bf9663
4 changed files with 203 additions and 156 deletions
+197 -156
View File
@@ -536,166 +536,200 @@ internal B32
dmn_lnx_thread_read_reg_block(DMN_LNX_Entity *thread, void *reg_block)
{
B32 result = 0;
switch(thread->arch)
{
REGS_RegBlockX64 *dst = (REGS_RegBlockX64 *)reg_block;
pid_t tid = (pid_t)thread->id;
case Arch_Null:
case Arch_COUNT:{}break;
case Arch_x86:
case Arch_arm64:
case Arch_arm32:
{NotImplemented;}break;
//- rjf: read GPR
B32 got_gpr = 0;
//- rjf: [x64]
case Arch_x64:
{
DMN_LNX_UserX64 ctx = {0};
struct iovec iov_gpr = {0};
iov_gpr.iov_len = sizeof(ctx);
iov_gpr.iov_base = &ctx;
if(ptrace(PTRACE_GETREGSET, tid, (void*)NT_PRSTATUS, &iov_gpr) != -1)
REGS_RegBlockX64 *dst = (REGS_RegBlockX64 *)reg_block;
pid_t tid = (pid_t)thread->id;
//- rjf: read GPR
B32 got_gpr = 0;
{
got_gpr = 1;
DMN_LNX_UserRegsX64 *src = &ctx.regs;
dst->rax.u64 = src->rax;
dst->rcx.u64 = src->rcx;
dst->rdx.u64 = src->rdx;
dst->rbx.u64 = src->rbx;
dst->rsp.u64 = src->rsp;
dst->rbp.u64 = src->rbp;
dst->rsi.u64 = src->rsi;
dst->rdi.u64 = src->rdi;
dst->r8.u64 = src->r8;
dst->r9.u64 = src->r9;
dst->r10.u64 = src->r10;
dst->r11.u64 = src->r11;
dst->r12.u64 = src->r12;
dst->r13.u64 = src->r13;
dst->r14.u64 = src->r14;
dst->r15.u64 = src->r15;
dst->cs.u16 = src->cs;
dst->ds.u16 = src->ds;
dst->es.u16 = src->es;
dst->fs.u16 = src->fs;
dst->gs.u16 = src->gs;
dst->ss.u16 = src->ss;
dst->fsbase.u64 = src->fsbase;
dst->gsbase.u64 = src->gsbase;
dst->rip.u64 = src->rip;
dst->rflags.u64 = src->rflags;
DMN_LNX_UserX64 ctx = {0};
struct iovec iov_gpr = {0};
iov_gpr.iov_len = sizeof(ctx);
iov_gpr.iov_base = &ctx;
if(ptrace(PTRACE_GETREGSET, tid, (void*)NT_PRSTATUS, &iov_gpr) != -1)
{
got_gpr = 1;
DMN_LNX_UserRegsX64 *src = &ctx.regs;
dst->rax.u64 = src->rax;
dst->rcx.u64 = src->rcx;
dst->rdx.u64 = src->rdx;
dst->rbx.u64 = src->rbx;
dst->rsp.u64 = src->rsp;
dst->rbp.u64 = src->rbp;
dst->rsi.u64 = src->rsi;
dst->rdi.u64 = src->rdi;
dst->r8.u64 = src->r8;
dst->r9.u64 = src->r9;
dst->r10.u64 = src->r10;
dst->r11.u64 = src->r11;
dst->r12.u64 = src->r12;
dst->r13.u64 = src->r13;
dst->r14.u64 = src->r14;
dst->r15.u64 = src->r15;
dst->cs.u16 = src->cs;
dst->ds.u16 = src->ds;
dst->es.u16 = src->es;
dst->fs.u16 = src->fs;
dst->gs.u16 = src->gs;
dst->ss.u16 = src->ss;
dst->fsbase.u64 = src->fsbase;
dst->gsbase.u64 = src->gsbase;
dst->rip.u64 = src->rip;
dst->rflags.u64 = src->rflags;
}
}
}
//- rjf: read FPR
B32 got_fpr = 0;
if(got_gpr)
{
Temp scratch = scratch_begin(0, 0);
DMN_LNX_XSave *xsave = 0;
DMN_LNX_XSaveLegacy *xsave_legacy = 0;
// rjf: try xsave
if(!xsave_legacy)
{
U8 xsave_buffer[KB(4)];
struct iovec iov_xsave = {0};
iov_xsave.iov_len = sizeof(xsave_buffer);
iov_xsave.iov_base = xsave_buffer;
if(ptrace(PTRACE_GETREGSET, tid, (void*)NT_X86_XSTATE, &iov_xsave) != -1)
{
xsave = push_array_no_zero(scratch.arena, DMN_LNX_XSave, 1);
MemoryCopy(xsave, xsave_buffer, sizeof(*xsave));
xsave_legacy = &xsave->legacy;
}
}
// rjf: try fxsave
if(!xsave_legacy)
{
DMN_LNX_XSaveLegacy fxsave = {0};
struct iovec iov_fxsave = {0};
iov_fxsave.iov_len = sizeof(fxsave);
iov_fxsave.iov_base = &fxsave;
if(ptrace(PTRACE_GETREGSET, tid, (void *)NT_FPREGSET, &iov_fxsave) != -1)
{
xsave_legacy = push_array_no_zero(scratch.arena, DMN_LNX_XSaveLegacy, 1);
MemoryCopy(xsave_legacy, &fxsave, sizeof(*xsave_legacy));
}
}
// rjf: fill from xsave legacy
if(xsave_legacy)
{
DMN_LNX_XSaveLegacy *src = xsave_legacy;
dst->fcw.u16 = src->fcw;
dst->fsw.u16 = src->fsw;
dst->ftw.u16 = src->ftw; // TODO(rjf): old: fix tag word (?)
dst->fop.u16 = src->fop;
dst->fip.u64 = src->b64.fip;
// TODO(rjf): these 16-bit registers do not belong in x64
dst->fcs.u16 = 0;
dst->fdp.u64 = src->b64.fdp;
dst->fds.u16 = 0;
dst->mxcsr.u32 = src->mxcsr;
dst->mxcsr_mask.u32 = src->mxcsr_mask;
{
U8 *float_s = src->st_space.u8;
REGS_Reg80 *float_d = &dst->st0;
for(U32 n = 0; n < 8; n += 1, float_s += 16, float_d += 1)
{
MemoryCopy(float_d, float_s, sizeof(*float_d));
}
}
{
U8 *xmm_s = src->xmm_space.u8;
REGS_Reg512 *xmm_d = &dst->zmm0;
for(U32 n = 0; n < 16; n += 1, xmm_s += 16, xmm_d += 1)
{
MemoryCopy(xmm_d, xmm_s, 16);
}
}
}
// rjf: fill from ymm registers
// TODO(rjf): this is a lie; ymm can technically move around. study & fix.
if(xsave)
{
B32 has_ymm_registers = ((xsave->header.xstate_bv & 4) != 0);
if(has_ymm_registers)
{
U8 *ymm_s = (U8 *)xsave->ymmh;
REGS_Reg512 *ymm_d = &dst->zmm0;
for(U32 n = 0; n < 16; n += 1, ymm_s += 16, ymm_d += 1)
{
MemoryCopy(((U8*)ymm_d) + 16, ymm_s, 16);
}
}
}
got_fpr = (xsave || xsave_legacy);
scratch_end(scratch);
}
//- rjf: read debug registers
B32 got_debug = 0;
if(got_fpr)
{
got_debug = 1;
REGS_Reg64 *dr_d = &dst->dr0;
for(U32 i = 0; i < 8; i += 1, dr_d += 1)
{
if(i != 4 && i != 5)
{
U64 offset = OffsetOf(DMN_LNX_UserX64, u_debugreg[i]);
errno = 0;
int peek_result = ptrace(PTRACE_PEEKUSER, tid, PtrFromInt(offset), 0);
if(errno == 0)
{
dr_d->u64 = (U64)peek_result;
}
else
{
got_debug = 0;
}
}
}
}
result = got_debug;
}break;
}
return result;
}
internal B32
dmn_lnx_thread_write_reg_block(DMN_LNX_Entity *thread, void *reg_block)
{
B32 result = 0;
switch(thread->arch)
{
case Arch_Null:
case Arch_COUNT:{}break;
case Arch_x86:
case Arch_arm64:
case Arch_arm32:
{NotImplemented;}break;
//- rjf: read FPR
B32 got_fpr = 0;
if(got_gpr)
//- rjf: [x64]
case Arch_x64:
{
Temp scratch = scratch_begin(0, 0);
DMN_LNX_XSave *xsave = 0;
DMN_LNX_XSaveLegacy *xsave_legacy = 0;
// rjf: try xsave
if(!xsave_legacy)
{
U8 xsave_buffer[KB(4)];
struct iovec iov_xsave = {0};
iov_xsave.iov_len = sizeof(xsave_buffer);
iov_xsave.iov_base = xsave_buffer;
if(ptrace(PTRACE_GETREGSET, tid, (void*)NT_X86_XSTATE, &iov_xsave) != -1)
{
xsave = push_array_no_zero(scratch.arena, DMN_LNX_XSave, 1);
MemoryCopy(xsave, xsave_buffer, sizeof(*xsave));
xsave_legacy = &xsave->legacy;
}
}
// rjf: try fxsave
if(!xsave_legacy)
{
DMN_LNX_XSaveLegacy fxsave = {0};
struct iovec iov_fxsave = {0};
iov_fxsave.iov_len = sizeof(fxsave);
iov_fxsave.iov_base = &fxsave;
if(ptrace(PTRACE_GETREGSET, tid, (void *)NT_FPREGSET, &iov_fxsave) != -1)
{
xsave_legacy = push_array_no_zero(scratch.arena, DMN_LNX_XSaveLegacy, 1);
MemoryCopy(xsave_legacy, &fxsave, sizeof(*xsave_legacy));
}
}
// rjf: fill from xsave legacy
if(xsave_legacy)
{
DMN_LNX_XSaveLegacy *src = xsave_legacy;
dst->fcw.u16 = src->fcw;
dst->fsw.u16 = src->fsw;
dst->ftw.u16 = src->ftw; // TODO(rjf): old: fix tag word (?)
dst->fop.u16 = src->fop;
dst->fip.u64 = src->b64.fip;
// TODO(rjf): these 16-bit registers do not belong in x64
dst->fcs.u16 = 0;
dst->fdp.u64 = src->b64.fdp;
dst->fds.u16 = 0;
dst->mxcsr.u32 = src->mxcsr;
dst->mxcsr_mask.u32 = src->mxcsr_mask;
{
U8 *float_s = src->st_space.u8;
REGS_Reg80 *float_d = &dst->st0;
for(U32 n = 0; n < 8; n += 1, float_s += 16, float_d += 1)
{
MemoryCopy(float_d, float_s, sizeof(*float_d));
}
}
{
U8 *xmm_s = src->xmm_space.u8;
REGS_Reg512 *xmm_d = &dst->zmm0;
for(U32 n = 0; n < 16; n += 1, xmm_s += 16, xmm_d += 1)
{
MemoryCopy(xmm_d, xmm_s, 16);
}
}
}
// rjf: fill from ymm registers
// TODO(rjf): this is a lie; ymm can technically move around. study & fix.
if(xsave)
{
B32 has_ymm_registers = ((xsave->header.xstate_bv & 4) != 0);
if(has_ymm_registers)
{
U8 *ymm_s = (U8 *)xsave->ymmh;
REGS_Reg512 *ymm_d = &dst->zmm0;
for(U32 n = 0; n < 16; n += 1, ymm_s += 16, ymm_d += 1)
{
MemoryCopy(((U8*)ymm_d) + 16, ymm_s, 16);
}
}
}
got_fpr = (xsave || xsave_legacy);
scratch_end(scratch);
}
//- rjf: read debug registers
B32 got_debug = 0;
if(got_fpr)
{
got_debug = 1;
REGS_Reg64 *dr_d = &dst->dr0;
for(U32 i = 0; i < 8; i += 1, dr_d += 1)
{
if(i != 4 && i != 5)
{
U64 offset = OffsetOf(DMN_LNX_UserX64, u_debugreg[i]);
errno = 0;
int peek_result = ptrace(PTRACE_PEEKUSER, tid, PtrFromInt(offset), 0);
if(errno == 0)
{
dr_d->u64 = (U64)peek_result;
}
else
{
got_debug = 0;
}
}
}
}
result = got_debug;
}break;
}
return result;
}
@@ -1223,9 +1257,10 @@ dmn_ctrl_run(Arena *arena, DMN_CtrlCtx *ctx, DMN_RunCtrls *ctrls)
// rjf: push event
DMN_Event *e = dmn_event_list_push(arena, &evts);
e->kind = e_kind;
e->process = dmn_lnx_handle_from_entity(process);
e->thread = dmn_lnx_handle_from_entity(thread);
e->kind = e_kind;
e->process = dmn_lnx_handle_from_entity(process);
e->thread = dmn_lnx_handle_from_entity(thread);
e->instruction_pointer = rip;
}
//- rjf: WSTOPSIG(status) is SIGSTOP
@@ -1473,7 +1508,13 @@ dmn_thread_read_reg_block(DMN_Handle handle, void *reg_block)
internal B32
dmn_thread_write_reg_block(DMN_Handle handle, void *reg_block)
{
return 0;
B32 result = 0;
DMN_AccessScope
{
DMN_LNX_Entity *thread = dmn_lnx_entity_from_handle(handle);
result = dmn_lnx_thread_write_reg_block(thread, reg_block);
}
return result;
}
//- rjf: system process listing
+1
View File
@@ -331,5 +331,6 @@ internal DMN_Handle dmn_lnx_handle_from_entity(DMN_LNX_Entity *entity);
internal DMN_LNX_Entity *dmn_lnx_entity_from_handle(DMN_Handle handle);
internal DMN_LNX_Entity *dmn_lnx_thread_from_pid(pid_t pid);
internal B32 dmn_lnx_thread_read_reg_block(DMN_LNX_Entity *thread, void *reg_block);
internal B32 dmn_lnx_thread_write_reg_block(DMN_LNX_Entity *thread, void *reg_block);
#endif // DEMON_CORE_LINUX_H
+4
View File
@@ -548,6 +548,8 @@ r_window_equip(OS_Handle handle)
r_d3d11_state->device->lpVtbl->CreateRenderTargetView(r_d3d11_state->device, (ID3D11Resource *)window->framebuffer, &framebuffer_rtv_desc, &window->framebuffer_rtv);
result = r_d3d11_handle_from_window(window);
r_d3d11_state->window_count += 1;
r_d3d11_state->dxgi_device->lpVtbl->SetMaximumFrameLatency(r_d3d11_state->dxgi_device, Clamp(1, r_d3d11_state->window_count, 16));
}
ProfEnd();
return result;
@@ -571,6 +573,8 @@ r_window_unequip(OS_Handle handle, R_Handle equip_handle)
window->swapchain->lpVtbl->Release(window->swapchain);
window->generation += 1;
SLLStackPush(r_d3d11_state->first_free_window, window);
r_d3d11_state->window_count -= 1;
r_d3d11_state->dxgi_device->lpVtbl->SetMaximumFrameLatency(r_d3d11_state->dxgi_device, Clamp(1, r_d3d11_state->window_count, 16));
}
ProfEnd();
}
+1
View File
@@ -129,6 +129,7 @@ struct R_D3D11_State
{
// rjf: state
Arena *arena;
U64 window_count;
R_D3D11_Window *first_free_window;
R_D3D11_Tex2D *first_free_tex2d;
R_D3D11_Buffer *first_free_buffer;