checkpoint on moving all of radbin to wavefront-style, first step to going wide on rdi baking as well

This commit is contained in:
Ryan Fleury
2025-08-26 08:38:13 -07:00
parent d8bb8c76c6
commit 69d0da9586
8 changed files with 150 additions and 126 deletions
+3 -1
View File
@@ -75,11 +75,13 @@ tctx_get_scratch(Arena **conflicts, U64 count)
//- rjf: lane metadata
internal void
internal LaneCtx
tctx_set_lane_ctx(LaneCtx lane_ctx)
{
TCTX *tctx = tctx_selected();
LaneCtx restore = tctx->lane_ctx;
tctx->lane_ctx = lane_ctx;
return restore;
}
internal void
+1 -1
View File
@@ -51,7 +51,7 @@ internal Arena *tctx_get_scratch(Arena **conflicts, U64 count);
#define scratch_end(scratch) temp_end(scratch)
//- rjf: lane metadata
internal void tctx_set_lane_ctx(LaneCtx lane_ctx);
internal LaneCtx tctx_set_lane_ctx(LaneCtx lane_ctx);
internal void tctx_lane_barrier_wait(void);
internal Rng1U64 tctx_lane_idx_range_from_count(U64 count);
#define lane_idx() (tctx_selected()->lane_ctx.lane_idx)
+99 -63
View File
@@ -7,22 +7,56 @@
#include "radbin/generated/radbin.meta.c"
////////////////////////////////
//~ rjf: Top-Level Entry Point
//~ rjf: Top-Level Entry Points
internal void
rb_entry_point(CmdLine *cmdline)
{
Temp scratch = scratch_begin(0, 0);
U64 threads_count = os_get_system_info()->logical_processor_count;
OS_Handle *threads = push_array(scratch.arena, OS_Handle, threads_count);
RB_ThreadParams *threads_params = push_array(scratch.arena, RB_ThreadParams, threads_count);
Barrier barrier = barrier_alloc(threads_count);
for EachIndex(idx, threads_count)
{
threads_params[idx].cmdline = cmdline;
threads_params[idx].lane_ctx.lane_idx = idx;
threads_params[idx].lane_ctx.lane_count = threads_count;
threads_params[idx].lane_ctx.barrier = barrier;
threads[idx] = os_thread_launch(rb_thread_entry_point, &threads_params[idx], 0);
}
for EachIndex(idx, threads_count)
{
os_thread_join(threads[idx], max_U64);
}
scratch_end(scratch);
}
internal void
rb_thread_entry_point(void *p)
{
RB_ThreadParams *params = (RB_ThreadParams *)p;
CmdLine *cmdline = params->cmdline;
LaneCtx lctx = params->lane_ctx;
lane_ctx(lctx);
Arena *arena = arena_alloc();
ASYNC_Root *async_root = async_root_alloc();
Log *log = log_alloc();
log_select(log);
log_scope_begin();
//////////////////////////////
//- rjf: set up shared state
//
if(lane_idx() == 0)
{
rb_shared = push_array(arena, RB_Shared, 1);
}
lane_sync();
//////////////////////////////
//- rjf: analyze & load command line input files
//
RB_FileList input_files = {0};
ProfScope("analyze & load command line input files")
ProfScope("analyze & load command line input files") if(lane_idx() == 0)
{
String8List input_file_path_tasks = str8_list_copy(arena, &cmdline->inputs);
for(String8Node *n = input_file_path_tasks.first; n != 0; n = n->next)
@@ -318,23 +352,29 @@ rb_entry_point(CmdLine *cmdline)
f->data = file_data;
RB_FileNode *file_n = push_array(arena, RB_FileNode, 1);
file_n->v = f;
SLLQueuePush(input_files.first, input_files.last, file_n);
input_files.count += 1;
SLLQueuePush(rb_shared->input_files.first, rb_shared->input_files.last, file_n);
rb_shared->input_files.count += 1;
}
}
}
lane_sync();
RB_FileList input_files = rb_shared->input_files;
//////////////////////////////
//- rjf: bucket input files by format
//
RB_FileList input_files_from_format_table[RB_FileFormat_COUNT] = {0};
for(RB_FileNode *n = input_files.first; n != 0; n = n->next)
ProfScope("bucket input files by format") if(lane_idx() == 0)
{
RB_FileNode *file_n = push_array(arena, RB_FileNode, 1);
file_n->v = n->v;
SLLQueuePush(input_files_from_format_table[n->v->format].first, input_files_from_format_table[n->v->format].last, file_n);
input_files_from_format_table[n->v->format].count += 1;
for(RB_FileNode *n = input_files.first; n != 0; n = n->next)
{
RB_FileNode *file_n = push_array(arena, RB_FileNode, 1);
file_n->v = n->v;
SLLQueuePush(rb_shared->input_files_from_format_table[n->v->format].first, rb_shared->input_files_from_format_table[n->v->format].last, file_n);
rb_shared->input_files_from_format_table[n->v->format].count += 1;
}
}
lane_sync();
RB_FileList *input_files_from_format_table = rb_shared->input_files_from_format_table;
//////////////////////////////
//- rjf: unpack which kind of output we're producing, and to where
@@ -400,17 +440,21 @@ rb_entry_point(CmdLine *cmdline)
//////////////////////////////
//- rjf: print help preamble
//
if(output_kind == OutputKind_Null || cmdline->inputs.node_count == 0)
if(lane_idx() == 0)
{
fprintf(stderr, "%s\n", BUILD_TITLE);
fprintf(stderr, "%s\n\n", BUILD_VERSION_STRING_LITERAL);
if(output_kind != OutputKind_Null)
if(output_kind == OutputKind_Null || cmdline->inputs.node_count == 0)
{
fprintf(stderr, "%.*s Help\n", str8_varg(output_kind_info[output_kind].title));
fprintf(stderr, "To see top-level options for radbin, run the binary with no arguments.\n\n");
fprintf(stderr, "%s\n", BUILD_TITLE);
fprintf(stderr, "%s\n\n", BUILD_VERSION_STRING_LITERAL);
if(output_kind != OutputKind_Null)
{
fprintf(stderr, "%.*s Help\n", str8_varg(output_kind_info[output_kind].title));
fprintf(stderr, "To see top-level options for radbin, run the binary with no arguments.\n\n");
}
fprintf(stderr, "-------------------------------------------------------------------------------\n\n");
}
fprintf(stderr, "-------------------------------------------------------------------------------\n\n");
}
lane_sync();
//////////////////////////////
//- rjf: perform operation based on output kind
@@ -423,6 +467,7 @@ rb_entry_point(CmdLine *cmdline)
//
default:
case OutputKind_Null:
if(lane_idx() == 0)
{
fprintf(stderr, "USAGE EXAMPLES\n\n");
@@ -479,7 +524,7 @@ rb_entry_point(CmdLine *cmdline)
case OutputKind_Breakpad:
{
//- rjf: no inputs => help
if(cmdline->inputs.node_count == 0) switch(output_kind)
if(lane_idx() == 0 && cmdline->inputs.node_count == 0) switch(output_kind)
{
default:
case OutputKind_RDI:
@@ -638,7 +683,7 @@ rb_entry_point(CmdLine *cmdline)
convert_params.subset_flags = subset_flags;
convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic"));
}
ProfScope("convert") bake_params = d2r_convert(arena, async_root, &convert_params);
ProfScope("convert") bake_params = d2r_convert(arena, &convert_params);
// rjf: no output path? -> pick one based on debug
if(output_path.size == 0)
@@ -672,23 +717,7 @@ rb_entry_point(CmdLine *cmdline)
convert_params.subset_flags = subset_flags;
convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic"));
}
if(cmd_line_has_flag(cmdline, str8_lit("p2r2")))
{
ProfScope("convert (2)")
{
U64 thread_count = os_get_system_info()->logical_processor_count;
Arena **thread_arenas = push_array(arena, Arena *, thread_count);
for EachIndex(idx, thread_count)
{
thread_arenas[idx] = arena_alloc();
}
bake_params = p2r2_convert(thread_arenas, thread_count, &convert_params);
}
}
else
{
ProfScope("convert") bake_params = p2r_convert(arena, async_root, &convert_params);
}
bake_params = p2r2_convert(arena, &convert_params);
// rjf: no output path? -> pick one based on PDB
if(output_path.size == 0) switch(output_kind)
@@ -838,7 +867,7 @@ rb_entry_point(CmdLine *cmdline)
B32 deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic"));
//- rjf: no inputs => help
if(cmdline->inputs.node_count == 0)
if(lane_idx() == 0 && cmdline->inputs.node_count == 0)
{
fprintf(stderr, "All input files specified on the command line will be dumped. Currently, only\n");
fprintf(stderr, "RDI files are supported.\n\n");
@@ -1028,43 +1057,50 @@ rb_entry_point(CmdLine *cmdline)
//////////////////////////////
//- rjf: write outputs
//
if(output_path.size != 0) ProfScope("write outputs [file]")
if(lane_idx() == 0)
{
os_write_data_list_to_file_path(output_path, output_blobs);
log_infof("Results written to %S", output_path);
}
else ProfScope("write outputs [stdout]")
{
for(String8Node *n = output_blobs.first; n != 0; n = n->next)
if(output_path.size != 0) ProfScope("write outputs [file]")
{
for(U64 off = 0; off < n->string.size;)
{
U64 size_to_write = Min(n->string.size - off, GB(2));
fwrite(n->string.str + off, size_to_write, 1, stdout);
off += size_to_write;
}
os_write_data_list_to_file_path(output_path, output_blobs);
log_infof("Results written to %S", output_path);
}
else ProfScope("write outputs [stdout]")
{
for(String8Node *n = output_blobs.first; n != 0; n = n->next)
{
for(U64 off = 0; off < n->string.size;)
{
U64 size_to_write = Min(n->string.size - off, GB(2));
fwrite(n->string.str + off, size_to_write, 1, stdout);
off += size_to_write;
}
}
log_info(str8_lit("Results written to stdout"));
}
log_info(str8_lit("Results written to stdout"));
}
lane_sync();
//////////////////////////////
//- rjf: write info & errors
//
LogScopeResult log_scope = log_scope_end(arena);
if(cmd_line_has_flag(cmdline, str8_lit("verbose")) && log_scope.strings[LogMsgKind_Info].size != 0)
if(lane_idx() == 0)
{
String8List lines = wrapped_lines_from_string(arena, log_scope.strings[LogMsgKind_Info], 80, 80, 0);
for(String8Node *n = lines.first; n != 0; n = n->next)
if(cmd_line_has_flag(cmdline, str8_lit("verbose")) && log_scope.strings[LogMsgKind_Info].size != 0)
{
fprintf(stderr, "%.*s\n", str8_varg(n->string));
String8List lines = wrapped_lines_from_string(arena, log_scope.strings[LogMsgKind_Info], 80, 80, 0);
for(String8Node *n = lines.first; n != 0; n = n->next)
{
fprintf(stderr, "%.*s\n", str8_varg(n->string));
}
}
}
if(log_scope.strings[LogMsgKind_UserError].size != 0)
{
String8List lines = wrapped_lines_from_string(arena, log_scope.strings[LogMsgKind_UserError], 80, 80, 0);
for(String8Node *n = lines.first; n != 0; n = n->next)
if(log_scope.strings[LogMsgKind_UserError].size != 0)
{
fprintf(stderr, "%.*s\n", str8_varg(n->string));
String8List lines = wrapped_lines_from_string(arena, log_scope.strings[LogMsgKind_UserError], 80, 80, 0);
for(String8Node *n = lines.first; n != 0; n = n->next)
{
fprintf(stderr, "%.*s\n", str8_varg(n->string));
}
}
}
}
+27 -1
View File
@@ -9,6 +9,16 @@
#include "radbin/generated/radbin.meta.h"
////////////////////////////////
//~ rjf: Thread Parameters
typedef struct RB_ThreadParams RB_ThreadParams;
struct RB_ThreadParams
{
CmdLine *cmdline;
LaneCtx lane_ctx;
};
////////////////////////////////
//~ rjf: File Types
@@ -46,8 +56,24 @@ read_only global RB_File rb_file_nil = {0};
#define rb_file_list_first(list) ((list)->first ? (list)->first->v : &rb_file_nil)
////////////////////////////////
//~ rjf: Top-Level Entry Point
//~ rjf: Cross-Thread State
typedef struct RB_Shared RB_Shared;
struct RB_Shared
{
RB_FileList input_files;
RB_FileList input_files_from_format_table[RB_FileFormat_COUNT];
};
////////////////////////////////
//~ rjf: Globals
global RB_Shared *rb_shared = 0;
////////////////////////////////
//~ rjf: Top-Level Entry Points
internal void rb_entry_point(CmdLine *cmdline);
internal void rb_thread_entry_point(void *p);
#endif //RADBIN_H
+1 -1
View File
@@ -1123,7 +1123,7 @@ d2r_push_scope(Arena *arena, RDIM_ScopeChunkList *scopes, U64 scope_chunk_cap, D
//~ rjf: Main Conversion Entry Point
internal RDIM_BakeParams
d2r_convert(Arena *arena, ASYNC_Root *async_root, D2R_ConvertParams *params)
d2r_convert(Arena *arena, D2R_ConvertParams *params)
{
Temp scratch = scratch_begin(&arena, 1);
+1 -1
View File
@@ -100,4 +100,4 @@ internal RDIM_Rng1U64ChunkList d2r_voff_ranges_from_cu_info_off(D2R_CompUnitCont
////////////////////////////////
//~ rjf: Main Conversion Entry Point
internal RDIM_BakeParams d2r_convert(Arena *arena, ASYNC_Root *async_root, D2R_ConvertParams *params);
internal RDIM_BakeParams d2r_convert(Arena *arena, D2R_ConvertParams *params);
+17 -56
View File
@@ -2,49 +2,8 @@
// Licensed under the MIT license (https://opensource.org/license/mit/)
internal RDIM_BakeParams
p2r2_convert(Arena **thread_arenas, U64 thread_count, P2R_ConvertParams *in)
p2r2_convert(Arena *arena, P2R_ConvertParams *params)
{
RDIM_BakeParams result = {0};
Temp scratch = scratch_begin(thread_arenas, thread_count);
Barrier barrier = barrier_alloc(thread_count);
{
P2R2_ConvertThreadParams *thread_params = push_array(scratch.arena, P2R2_ConvertThreadParams, thread_count);
OS_Handle *threads = push_array(scratch.arena, OS_Handle, thread_count);
for EachIndex(idx, thread_count)
{
thread_params[idx].arena = thread_arenas[idx];
thread_params[idx].lane_ctx.lane_idx = idx;
thread_params[idx].lane_ctx.lane_count = thread_count;
thread_params[idx].lane_ctx.barrier = barrier;
thread_params[idx].input_exe_name = in->input_exe_name;
thread_params[idx].input_exe_data = in->input_exe_data;
thread_params[idx].input_pdb_name = in->input_pdb_name;
thread_params[idx].input_pdb_data = in->input_pdb_data;
thread_params[idx].deterministic = in->deterministic;
thread_params[idx].out_bake_params = &result;
}
for EachIndex(idx, thread_count)
{
threads[idx] = os_thread_launch(p2r2_convert_thread_entry_point, &thread_params[idx], 0);
}
for EachIndex(idx, thread_count)
{
os_thread_join(threads[idx], max_U64);
}
}
barrier_release(barrier);
scratch_end(scratch);
return result;
}
internal void
p2r2_convert_thread_entry_point(void *p)
{
P2R2_ConvertThreadParams *params = (P2R2_ConvertThreadParams *)p;
Arena *arena = params->arena;
lane_ctx(params->lane_ctx);
ThreadNameF("p2r2_convert_thread_%I64u", lane_idx());
//////////////////////////////////////////////////////////////
//- rjf: do base MSF parse
//
@@ -3957,7 +3916,7 @@ p2r2_convert_thread_entry_point(void *p)
//////////////////////////////////////////////////////////////
//- rjf: bundle all outputs
//
if(lane_idx() == 0)
RDIM_BakeParams result = {0};
{
//- rjf: produce top-level-info
RDIM_TopLevelInfo top_level_info = {0};
@@ -3993,18 +3952,20 @@ p2r2_convert_thread_entry_point(void *p)
}
//- rjf: fill
params->out_bake_params->top_level_info = top_level_info;
params->out_bake_params->binary_sections = binary_sections;
params->out_bake_params->units = all_units;
params->out_bake_params->types = all_types;
params->out_bake_params->udts = all_udts;
params->out_bake_params->src_files = all_src_files;
params->out_bake_params->line_tables = all_line_tables;
params->out_bake_params->global_variables = all_global_variables;
params->out_bake_params->thread_variables = all_thread_variables;
params->out_bake_params->constants = all_constants;
params->out_bake_params->procedures = all_procedures;
params->out_bake_params->scopes = all_scopes;
params->out_bake_params->inline_sites = all_inline_sites;
result.top_level_info = top_level_info;
result.binary_sections = binary_sections;
result.units = all_units;
result.types = all_types;
result.udts = all_udts;
result.src_files = all_src_files;
result.line_tables = all_line_tables;
result.global_variables = all_global_variables;
result.thread_variables = all_thread_variables;
result.constants = all_constants;
result.procedures = all_procedures;
result.scopes = all_scopes;
result.inline_sites = all_inline_sites;
}
return result;
}
+1 -2
View File
@@ -133,7 +133,6 @@ struct P2R2_Shared
global P2R2_Shared *p2r2_shared = 0;
internal RDIM_BakeParams p2r2_convert(Arena **thread_arenas, U64 thread_count, P2R_ConvertParams *in);
internal void p2r2_convert_thread_entry_point(void *p);
internal RDIM_BakeParams p2r2_convert(Arena *arena, P2R_ConvertParams *params);
#endif // RDI_FROM_PDB_2_H