From 56bc399fd5ce533e852db8560e7fa46805b75770 Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Wed, 10 Apr 2024 09:33:55 -0700 Subject: [PATCH] initial pass for compression in converter; decompression in debuggerdbgi layer --- src/base/base_core.h | 1 + src/dbgi/dbgi.c | 80 +- src/lib_raddbgi_format/raddbgi_format.h | 3 +- src/lib_raddbgi_format/raddbgi_format_parse.c | 20 +- src/lib_raddbgi_make/raddbgi_make.c | 86 +- src/lib_raddbgi_make/raddbgi_make.h | 6 +- src/raddbg/raddbg_main.cpp | 13 +- src/raddbgi_from_pdb/raddbgi_from_pdb.c | 98 ++ src/raddbgi_from_pdb/raddbgi_from_pdb.h | 30 +- src/raddbgi_from_pdb/raddbgi_from_pdb_main.c | 11 +- .../rad_lzb_simple/rad_lzb_simple.c | 1402 +++++++++++++++++ .../rad_lzb_simple/rad_lzb_simple.h | 141 ++ 12 files changed, 1824 insertions(+), 67 deletions(-) create mode 100644 src/third_party/rad_lzb_simple/rad_lzb_simple.c create mode 100644 src/third_party/rad_lzb_simple/rad_lzb_simple.h diff --git a/src/base/base_core.h b/src/base/base_core.h index 87add9df..7e8a6be5 100644 --- a/src/base/base_core.h +++ b/src/base/base_core.h @@ -168,6 +168,7 @@ # define ins_atomic_u64_dec_eval(x) InterlockedDecrement64((volatile __int64 *)(x)) # define ins_atomic_u64_eval_assign(x,c) InterlockedExchange64((volatile __int64 *)(x),(c)) # define ins_atomic_u64_add_eval(x,c) InterlockedAdd64((volatile __int64 *)(x), c) +# define ins_atomic_u64_eval_cond_assign(x,k,c) InterlockedCompareExchange64((volatile __int64 *)(x),(k),(c)) # define ins_atomic_u32_eval(x,c) InterlockedAdd((volatile LONG *)(x), 0) # define ins_atomic_u32_eval_assign(x,c) InterlockedExchange((volatile LONG *)(x),(c)) # define ins_atomic_u32_eval_cond_assign(x,k,c) InterlockedCompareExchange((volatile LONG *)(x),(k),(c)) diff --git a/src/dbgi/dbgi.c b/src/dbgi/dbgi.c index de5d7143..1a341c20 100644 --- a/src/dbgi/dbgi.c +++ b/src/dbgi/dbgi.c @@ -859,6 +859,13 @@ dbgi_parse_thread_entry_point(void *p) os_file_close(file); } + //- rjf: heuristically choose compression settings + B32 should_compress = 0; + if(og_dbg_props.size > MB(64)) + { + should_compress = 1; + } + //- rjf: raddbg file not up-to-date? we need to generate it if(do_task) { @@ -883,6 +890,10 @@ dbgi_parse_thread_entry_point(void *p) str8_list_pushf(scratch.arena, &opts.cmd_line, "raddbg"); str8_list_pushf(scratch.arena, &opts.cmd_line, "--convert"); str8_list_pushf(scratch.arena, &opts.cmd_line, "--quiet"); + if(should_compress) + { + str8_list_pushf(scratch.arena, &opts.cmd_line, "--compress"); + } //str8_list_pushf(scratch.arena, &opts.cmd_line, "--capture"); str8_list_pushf(scratch.arena, &opts.cmd_line, "--exe:%S", exe_path); str8_list_pushf(scratch.arena, &opts.cmd_line, "--pdb:%S", og_dbg_path); @@ -1020,15 +1031,72 @@ dbgi_parse_thread_entry_point(void *p) do_task = 0; } - //- rjf: parse raddbg info - RDI_Parsed rdi_parsed = dbgi_parse_nil.rdi; - U64 arch_addr_size = 8; + //- rjf: initial parse of raddbg info + RDI_Parsed rdi_parsed_maybe_compressed = dbgi_parse_nil.rdi; if(do_task) { - RDI_ParseStatus parse_status = rdi_parse((U8 *)raddbgi_file_base, raddbgi_file_props.size, &rdi_parsed); - if(rdi_parsed.top_level_info != 0) + RDI_ParseStatus parse_status = rdi_parse((U8 *)raddbgi_file_base, raddbgi_file_props.size, &rdi_parsed_maybe_compressed); + (void)parse_status; + } + + //- rjf: decompress, if necessary + RDI_Parsed rdi_parsed = rdi_parsed_maybe_compressed; + if(do_task) + { + U64 decompressed_size = raddbgi_file_props.size; + for(U64 dsec_idx = 0; dsec_idx < rdi_parsed_maybe_compressed.dsec_count; dsec_idx += 1) { - arch_addr_size = rdi_addr_size_from_arch(rdi_parsed.top_level_info->architecture); + decompressed_size += (rdi_parsed_maybe_compressed.dsecs[dsec_idx].unpacked_size - rdi_parsed_maybe_compressed.dsecs[dsec_idx].encoded_size); + } + if(decompressed_size > raddbgi_file_props.size) + { + U8 *decompressed_data = push_array_no_zero(parse_arena, U8, decompressed_size); + + // rjf: copy header + RDI_Header *src_header = (RDI_Header *)raddbgi_file_base; + RDI_Header *dst_header = (RDI_Header *)decompressed_data; + { + MemoryCopy(dst_header, src_header, sizeof(RDI_Header)); + } + + // rjf: copy & adjust sections for decompressed version + if(rdi_parsed_maybe_compressed.dsec_count != 0) + { + RDI_DataSection *dsec_base = (RDI_DataSection *)(decompressed_data + dst_header->data_section_off); + MemoryCopy(dsec_base, (U8 *)raddbgi_file_base + src_header->data_section_off, sizeof(RDI_DataSection) * rdi_parsed_maybe_compressed.dsec_count); + U64 off = dst_header->data_section_off + sizeof(RDI_DataSection) * rdi_parsed_maybe_compressed.dsec_count; + off += 7; + off -= off%8; + for(U64 idx = 0; idx < rdi_parsed_maybe_compressed.dsec_count; idx += 1) + { + dsec_base[idx].encoding = RDI_DataSectionEncoding_Unpacked; + dsec_base[idx].off = off; + dsec_base[idx].encoded_size = dsec_base[idx].unpacked_size; + off += dsec_base[idx].unpacked_size; + off += 7; + off -= off%8; + } + } + + // rjf: decompress sections into new decompressed file buffer + if(rdi_parsed_maybe_compressed.dsec_count != 0) + { + RDI_DataSection *src_first = rdi_parsed_maybe_compressed.dsecs; + RDI_DataSection *dst_first = (RDI_DataSection *)(decompressed_data + dst_header->data_section_off); + RDI_DataSection *src_opl = src_first + rdi_parsed_maybe_compressed.dsec_count; + RDI_DataSection *dst_opl = dst_first + rdi_parsed_maybe_compressed.dsec_count; + for(RDI_DataSection *src = src_first, *dst = dst_first; + src < src_opl && dst < dst_opl; + src += 1, dst += 1) + { + rr_lzb_simple_decode((U8*)raddbgi_file_base + src->off, src->encoded_size, + decompressed_data + dst->off, dst->unpacked_size); + } + } + + // rjf: re-parse + RDI_ParseStatus parse_status = rdi_parse(decompressed_data, decompressed_size, &rdi_parsed); + (void)parse_status; } } diff --git a/src/lib_raddbgi_format/raddbgi_format.h b/src/lib_raddbgi_format/raddbgi_format.h index 16a03d44..f0bc7b75 100644 --- a/src/lib_raddbgi_format/raddbgi_format.h +++ b/src/lib_raddbgi_format/raddbgi_format.h @@ -300,7 +300,8 @@ typedef enum RDI_DataSectionTagEnum{ #define RDI_DataSectionEncodingXList(X) \ -X(Unpacked, 0) +X(Unpacked, 0)\ +X(LZB, 1) typedef RDI_U32 RDI_DataSectionEncoding; typedef enum RDI_DataSectionEncodingEnum{ diff --git a/src/lib_raddbgi_format/raddbgi_format_parse.c b/src/lib_raddbgi_format/raddbgi_format_parse.c index 4f6e0b4e..3b3c2f61 100644 --- a/src/lib_raddbgi_format/raddbgi_format_parse.c +++ b/src/lib_raddbgi_format/raddbgi_format_parse.c @@ -537,23 +537,23 @@ rdi_first_voff_from_proc(RDI_Parsed *p, RDI_U32 proc_id){ RDI_PROC void* rdi_data_from_dsec(RDI_Parsed *parsed, RDI_U32 idx, RDI_U32 item_size, RDI_DataSectionTag expected_tag, - RDI_U64 *count_out){ + RDI_U64 *count_out) +{ void *result = 0; RDI_U32 count_result = 0; - - // TODO(allen): need a version of this that works with encodings other than "Unpacked" - - if (0 < idx && idx < parsed->dsec_count){ + if(0 < idx && idx < parsed->dsec_count) + { RDI_DataSection *ds = parsed->dsecs + idx; - if (ds->tag == expected_tag){ - RDI_U64 opl = ds->off + ds->encoded_size; - if (opl <= parsed->raw_data_size){ - count_result = ds->encoded_size/item_size; + if(ds->tag == expected_tag) + { + RDI_U64 encoded_opl = ds->off + ds->encoded_size; + if(encoded_opl <= parsed->raw_data_size) + { + count_result = ds->unpacked_size/item_size; result = (parsed->raw_data + ds->off); } } } - *count_out = count_result; return(result); } diff --git a/src/lib_raddbgi_make/raddbgi_make.c b/src/lib_raddbgi_make/raddbgi_make.c index 11b41166..9165fdae 100644 --- a/src/lib_raddbgi_make/raddbgi_make.c +++ b/src/lib_raddbgi_make/raddbgi_make.c @@ -1744,11 +1744,13 @@ rdim_bake_section_list_push(RDIM_Arena *arena, RDIM_BakeSectionList *list) } RDI_PROC RDIM_BakeSection * -rdim_bake_section_list_push_new(RDIM_Arena *arena, RDIM_BakeSectionList *list, void *data, RDI_U64 size, RDI_DataSectionTag tag, RDI_U64 tag_idx) +rdim_bake_section_list_push_new_unpacked(RDIM_Arena *arena, RDIM_BakeSectionList *list, void *data, RDI_U64 size, RDI_DataSectionTag tag, RDI_U64 tag_idx) { RDIM_BakeSection *section = rdim_bake_section_list_push(arena, list); section->data = data; - section->size = size; + section->encoding = RDI_DataSectionEncoding_Unpacked; + section->encoded_size = size; + section->unpacked_size = size; section->tag = tag; section->tag_idx = tag_idx; return section; @@ -2142,7 +2144,7 @@ rdim_bake_top_level_info_section_list_from_params(RDIM_Arena *arena, RDIM_BakeSt dst_tli->exe_name_string_idx = rdim_bake_idx_from_string(strings, src_tli->exe_name); dst_tli->exe_hash = src_tli->exe_hash; dst_tli->voff_max = src_tli->voff_max; - rdim_bake_section_list_push_new(arena, §ions, dst_tli, sizeof(*dst_tli), RDI_DataSectionTag_TopLevelInfo, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_tli, sizeof(*dst_tli), RDI_DataSectionTag_TopLevelInfo, 0); return sections; } @@ -2166,7 +2168,7 @@ rdim_bake_binary_section_section_list_from_params(RDIM_Arena *arena, RDIM_BakeSt dst->foff_first = src->foff_first; dst->foff_opl = src->foff_opl; } - rdim_bake_section_list_push_new(arena, §ions, dst_base, sizeof(*dst_base)*dst_idx, RDI_DataSectionTag_BinarySections, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_base, sizeof(*dst_base)*dst_idx, RDI_DataSectionTag_BinarySections, 0); return sections; } @@ -2286,11 +2288,11 @@ rdim_bake_section_list_from_unit(RDIM_Arena *arena, RDIM_Unit *unit) //- rjf: build line info sections // U64 unit_idx = rdim_idx_from_unit(unit); - rdim_bake_section_list_push_new(arena, §ions, unit_voffs, sizeof(RDI_U64)*(unit_line_count+1), RDI_DataSectionTag_LineInfoVoffs, unit_idx); - rdim_bake_section_list_push_new(arena, §ions, unit_lines, sizeof(RDI_Line)*unit_line_count, RDI_DataSectionTag_LineInfoData, unit_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_voffs, sizeof(RDI_U64)*(unit_line_count+1), RDI_DataSectionTag_LineInfoVoffs, unit_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_lines, sizeof(RDI_Line)*unit_line_count, RDI_DataSectionTag_LineInfoData, unit_idx); if(unit_cols != 0) { - rdim_bake_section_list_push_new(arena, §ions, unit_cols, sizeof(RDI_Column)*unit_line_count, RDI_DataSectionTag_LineInfoColumns, unit_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_cols, sizeof(RDI_Column)*unit_line_count, RDI_DataSectionTag_LineInfoColumns, unit_idx); } return sections; @@ -2417,11 +2419,11 @@ rdim_bake_unit_top_level_section_list_from_params(RDIM_Arena *arena, RDIM_BakeSt //////////////////////// //- rjf: build line info sections // - rdim_bake_section_list_push_new(arena, §ions, unit_voffs, sizeof(RDI_U64)*(unit_line_count+1), RDI_DataSectionTag_LineInfoVoffs, dst_idx); - rdim_bake_section_list_push_new(arena, §ions, unit_lines, sizeof(RDI_Line)*unit_line_count, RDI_DataSectionTag_LineInfoData, dst_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_voffs, sizeof(RDI_U64)*(unit_line_count+1), RDI_DataSectionTag_LineInfoVoffs, dst_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_lines, sizeof(RDI_Line)*unit_line_count, RDI_DataSectionTag_LineInfoData, dst_idx); if(unit_cols != 0) { - rdim_bake_section_list_push_new(arena, §ions, unit_cols, sizeof(RDI_Column)*unit_line_count, RDI_DataSectionTag_LineInfoColumns, dst_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_cols, sizeof(RDI_Column)*unit_line_count, RDI_DataSectionTag_LineInfoColumns, dst_idx); } //////////////////////// @@ -2439,7 +2441,7 @@ rdim_bake_unit_top_level_section_list_from_params(RDIM_Arena *arena, RDIM_BakeSt dst->line_info_col_data_idx = (RDI_U32)rdim_bake_section_idx_from_params_tag_idx(params, RDI_DataSectionTag_LineInfoColumns, dst_idx); // TODO(rjf): @u64_to_u32 } } - rdim_bake_section_list_push_new(arena, §ions, dst_base, sizeof(*dst_base)*dst_idx, RDI_DataSectionTag_Units, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_base, sizeof(*dst_base)*dst_idx, RDI_DataSectionTag_Units, 0); return sections; } @@ -2514,7 +2516,7 @@ rdim_bake_unit_vmap_section_list_from_params(RDIM_Arena *arena, RDIM_BakeParams //- rjf: build section RDIM_BakeSectionList sections = {0}; RDI_U64 unit_vmap_size = sizeof(unit_vmap.vmap[0])*(unit_vmap.count+1); - rdim_bake_section_list_push_new(arena, §ions, unit_vmap.vmap, unit_vmap_size, RDI_DataSectionTag_UnitVmap, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, unit_vmap.vmap, unit_vmap_size, RDI_DataSectionTag_UnitVmap, 0); return sections; } @@ -2689,9 +2691,9 @@ rdim_bake_src_file_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringMa dst_file->line_map_nums_data_idx = (RDI_U32)rdim_bake_section_idx_from_params_tag_idx(params, RDI_DataSectionTag_LineMapNumbers, dst_file_idx); // TODO(rjf): @u64_to_u32 dst_file->line_map_range_data_idx = (RDI_U32)rdim_bake_section_idx_from_params_tag_idx(params, RDI_DataSectionTag_LineMapRanges, dst_file_idx); // TODO(rjf): @u64_to_u32 dst_file->line_map_voff_data_idx = (RDI_U32)rdim_bake_section_idx_from_params_tag_idx(params, RDI_DataSectionTag_LineMapVoffs, dst_file_idx); // TODO(rjf): @u64_to_u32 - rdim_bake_section_list_push_new(arena, §ions, src_file_line_nums, sizeof(*src_file_line_nums)*src_file_line_count, RDI_DataSectionTag_LineMapNumbers, dst_file_idx); - rdim_bake_section_list_push_new(arena, §ions, src_file_line_ranges, sizeof(*src_file_line_ranges)*(src_file_line_count + 1), RDI_DataSectionTag_LineMapRanges, dst_file_idx); - rdim_bake_section_list_push_new(arena, §ions, src_file_voffs, sizeof(*src_file_voffs)*src_file_voff_count, RDI_DataSectionTag_LineMapVoffs, dst_file_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, src_file_line_nums, sizeof(*src_file_line_nums)*src_file_line_count, RDI_DataSectionTag_LineMapNumbers, dst_file_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, src_file_line_ranges, sizeof(*src_file_line_ranges)*(src_file_line_count + 1), RDI_DataSectionTag_LineMapRanges, dst_file_idx); + rdim_bake_section_list_push_new_unpacked(arena, §ions, src_file_voffs, sizeof(*src_file_voffs)*src_file_voff_count, RDI_DataSectionTag_LineMapVoffs, dst_file_idx); } } } @@ -2699,7 +2701,7 @@ rdim_bake_src_file_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringMa //////////////////////////// //- rjf: build section for all source files // - rdim_bake_section_list_push_new(arena, §ions, dst_files, sizeof(RDI_SourceFile)*dst_files_count, RDI_DataSectionTag_SourceFiles, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_files, sizeof(RDI_SourceFile)*dst_files_count, RDI_DataSectionTag_SourceFiles, 0); return sections; } @@ -2775,7 +2777,7 @@ rdim_bake_type_node_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringM //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, type_nodes, sizeof(RDI_TypeNode)*(params->types.total_count+1), RDI_DataSectionTag_TypeNodes, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, type_nodes, sizeof(RDI_TypeNode)*(params->types.total_count+1), RDI_DataSectionTag_TypeNodes, 0); return sections; } @@ -2843,9 +2845,9 @@ rdim_bake_udt_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringMapTigh //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, udts, sizeof(RDI_UDT) * (params->udts.total_count+1), RDI_DataSectionTag_UDTs, 0); - rdim_bake_section_list_push_new(arena, §ions, members , sizeof(RDI_Member) * (params->udts.total_member_count+1), RDI_DataSectionTag_Members, 0); - rdim_bake_section_list_push_new(arena, §ions, enum_members, sizeof(RDI_EnumMember) * (params->udts.total_enum_val_count+1), RDI_DataSectionTag_EnumMembers, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, udts, sizeof(RDI_UDT) * (params->udts.total_count+1), RDI_DataSectionTag_UDTs, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, members , sizeof(RDI_Member) * (params->udts.total_member_count+1), RDI_DataSectionTag_Members, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, enum_members, sizeof(RDI_EnumMember) * (params->udts.total_enum_val_count+1), RDI_DataSectionTag_EnumMembers, 0); return sections; } @@ -2887,7 +2889,7 @@ rdim_bake_global_variable_section_list_from_params(RDIM_Arena *arena, RDIM_BakeS //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, global_variables, sizeof(RDI_GlobalVariable)*(params->global_variables.total_count+1), RDI_DataSectionTag_GlobalVariables, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, global_variables, sizeof(RDI_GlobalVariable)*(params->global_variables.total_count+1), RDI_DataSectionTag_GlobalVariables, 0); return sections; } @@ -2966,7 +2968,7 @@ rdim_bake_global_vmap_section_list_from_params(RDIM_Arena *arena, RDIM_BakeParam //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, global_vmap.vmap, sizeof(RDI_VMapEntry)*(global_vmap.count+1), RDI_DataSectionTag_GlobalVmap, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, global_vmap.vmap, sizeof(RDI_VMapEntry)*(global_vmap.count+1), RDI_DataSectionTag_GlobalVmap, 0); return sections; } @@ -3008,7 +3010,7 @@ rdim_bake_thread_variable_section_list_from_params(RDIM_Arena *arena, RDIM_BakeS //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, thread_variables, sizeof(RDI_ThreadVariable)*(params->thread_variables.total_count+1), RDI_DataSectionTag_ThreadVariables, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, thread_variables, sizeof(RDI_ThreadVariable)*(params->thread_variables.total_count+1), RDI_DataSectionTag_ThreadVariables, 0); return sections; } @@ -3051,7 +3053,7 @@ rdim_bake_procedure_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringM //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, procedures, sizeof(RDI_Procedure)*(params->procedures.total_count+1), RDI_DataSectionTag_Procedures, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, procedures, sizeof(RDI_Procedure)*(params->procedures.total_count+1), RDI_DataSectionTag_Procedures, 0); return sections; } @@ -3218,11 +3220,11 @@ rdim_bake_scope_section_list_from_params(RDIM_Arena *arena, RDIM_BakeStringMapTi RDIM_BakeSectionList sections = {0}; RDIM_ProfScope("push all symbol info sections") { - rdim_bake_section_list_push_new(arena, §ions, scopes, sizeof(RDI_Scope) * (params->scopes.total_count+1), RDI_DataSectionTag_Scopes, 0); - rdim_bake_section_list_push_new(arena, §ions, scope_voffs, sizeof(RDI_U64) * (params->scopes.scope_voff_count+1), RDI_DataSectionTag_ScopeVoffData, 0); - rdim_bake_section_list_push_new(arena, §ions, locals, sizeof(RDI_Local) * (params->scopes.local_count+1), RDI_DataSectionTag_Locals, 0); - rdim_bake_section_list_push_new(arena, §ions, location_blocks, sizeof(RDI_LocationBlock) * (params->scopes.location_count+1), RDI_DataSectionTag_LocationBlocks, 0); - rdim_bake_section_list_push_new(arena, §ions, location_data_blob.str, location_data_blob.size, RDI_DataSectionTag_LocationData, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, scopes, sizeof(RDI_Scope) * (params->scopes.total_count+1), RDI_DataSectionTag_Scopes, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, scope_voffs, sizeof(RDI_U64) * (params->scopes.scope_voff_count+1), RDI_DataSectionTag_ScopeVoffData, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, locals, sizeof(RDI_Local) * (params->scopes.local_count+1), RDI_DataSectionTag_Locals, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, location_blocks, sizeof(RDI_LocationBlock) * (params->scopes.location_count+1), RDI_DataSectionTag_LocationBlocks, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, location_data_blob.str, location_data_blob.size, RDI_DataSectionTag_LocationData, 0); } rdim_scratch_end(scratch); return sections; @@ -3280,7 +3282,7 @@ rdim_bake_scope_vmap_section_list_from_params(RDIM_Arena *arena, RDIM_BakeParams //- rjf: build sections RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, scope_vmap.vmap, sizeof(RDI_VMapEntry)*(scope_vmap.count+1), RDI_DataSectionTag_ScopeVmap, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, scope_vmap.vmap, sizeof(RDI_VMapEntry)*(scope_vmap.count+1), RDI_DataSectionTag_ScopeVmap, 0); return sections; } @@ -3322,7 +3324,7 @@ rdim_bake_top_level_name_map_section_list_from_params_maps(RDIM_Arena *arena, RD } // rjf: push section for all name maps - rdim_bake_section_list_push_new(arena, §ions, dst_maps, sizeof(RDI_NameMap)*name_map_count, RDI_DataSectionTag_NameMaps, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_maps, sizeof(RDI_NameMap)*name_map_count, RDI_DataSectionTag_NameMaps, 0); return sections; } @@ -3424,8 +3426,8 @@ rdim_bake_name_map_section_list_from_params_kind_map(RDIM_Arena *arena, RDIM_Bak } // rjf: sections for buckets/nodes - rdim_bake_section_list_push_new(arena, §ions, baked_buckets, sizeof(RDI_NameMapBucket)* baked_buckets_count, RDI_DataSectionTag_NameMapBuckets, (RDI_U64)k); - rdim_bake_section_list_push_new(arena, §ions, baked_nodes, sizeof(RDI_NameMapNode) * baked_nodes_count, RDI_DataSectionTag_NameMapNodes, (RDI_U64)k); + rdim_bake_section_list_push_new_unpacked(arena, §ions, baked_buckets, sizeof(RDI_NameMapBucket)* baked_buckets_count, RDI_DataSectionTag_NameMapBuckets, (RDI_U64)k); + rdim_bake_section_list_push_new_unpacked(arena, §ions, baked_nodes, sizeof(RDI_NameMapNode) * baked_nodes_count, RDI_DataSectionTag_NameMapNodes, (RDI_U64)k); } return sections; } @@ -3461,7 +3463,7 @@ rdim_bake_file_path_section_list_from_path_tree(RDIM_Arena *arena, RDIM_BakeStri } } RDIM_BakeSectionList sections = {0}; - rdim_bake_section_list_push_new(arena, §ions, dst_nodes, sizeof(RDI_FilePathNode)*dst_nodes_count, RDI_DataSectionTag_FilePathNodes, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, dst_nodes, sizeof(RDI_FilePathNode)*dst_nodes_count, RDI_DataSectionTag_FilePathNodes, 0); return sections; } @@ -3507,8 +3509,8 @@ rdim_bake_string_section_list_from_string_map(RDIM_Arena *arena, RDIM_BakeString } } } - rdim_bake_section_list_push_new(arena, §ions, str_offs, sizeof(RDI_U32)*(strings->total_count+1), RDI_DataSectionTag_StringTable, 0); - rdim_bake_section_list_push_new(arena, §ions, buf, off_cursor, RDI_DataSectionTag_StringData, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, str_offs, sizeof(RDI_U32)*(strings->total_count+1), RDI_DataSectionTag_StringTable, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, buf, off_cursor, RDI_DataSectionTag_StringData, 0); return sections; } @@ -3530,7 +3532,7 @@ rdim_bake_idx_run_section_list_from_idx_run_map(RDIM_Arena *arena, RDIM_BakeIdxR out_ptr += node->count; } } - rdim_bake_section_list_push_new(arena, §ions, idx_data, sizeof(RDI_U32)*idx_runs->idx_count, RDI_DataSectionTag_IndexRuns, 0); + rdim_bake_section_list_push_new_unpacked(arena, §ions, idx_data, sizeof(RDI_U32)*idx_runs->idx_count, RDI_DataSectionTag_IndexRuns, 0); return sections; } @@ -3591,17 +3593,17 @@ rdim_serialized_strings_from_params_bake_section_list(RDIM_Arena *arena, RDIM_Ba if(src == 0) { continue; } RDI_DataSection *dst = rdi_sections+dst_idx; U64 data_section_off = 0; - if(src->size != 0) + if(src->encoded_size != 0) { rdim_str8_list_push_align(arena, &strings, 8); data_section_off = strings.total_size; - rdim_str8_list_push(arena, &strings, rdim_str8((RDI_U8 *)src->data, src->size)); + rdim_str8_list_push(arena, &strings, rdim_str8((RDI_U8 *)src->data, src->encoded_size)); } dst->tag = src->tag; - dst->encoding = RDI_DataSectionEncoding_Unpacked; + dst->encoding = src->encoding; dst->off = data_section_off; - dst->encoded_size = src->size; - dst->unpacked_size = src->size; + dst->encoded_size = src->encoded_size; + dst->unpacked_size = src->unpacked_size; } rdim_scratch_end(scratch); diff --git a/src/lib_raddbgi_make/raddbgi_make.h b/src/lib_raddbgi_make/raddbgi_make.h index 3cb989f0..6c773aa4 100644 --- a/src/lib_raddbgi_make/raddbgi_make.h +++ b/src/lib_raddbgi_make/raddbgi_make.h @@ -811,7 +811,9 @@ typedef struct RDIM_BakeSection RDIM_BakeSection; struct RDIM_BakeSection { void *data; - RDI_U64 size; + RDI_DataSectionEncoding encoding; + RDI_U64 encoded_size; + RDI_U64 unpacked_size; RDI_DataSectionTag tag; RDI_U64 tag_idx; }; @@ -1153,7 +1155,7 @@ RDI_PROC void rdim_bake_name_map_push(RDIM_Arena *arena, RDIM_BakeNameMap *map, //~ rjf: [Baking Helpers] Data Section List Building Helpers RDI_PROC RDIM_BakeSection *rdim_bake_section_list_push(RDIM_Arena *arena, RDIM_BakeSectionList *list); -RDI_PROC RDIM_BakeSection *rdim_bake_section_list_push_new(RDIM_Arena *arena, RDIM_BakeSectionList *list, void *data, RDI_U64 size, RDI_DataSectionTag tag, RDI_U64 tag_idx); +RDI_PROC RDIM_BakeSection *rdim_bake_section_list_push_new_unpacked(RDIM_Arena *arena, RDIM_BakeSectionList *list, void *data, RDI_U64 size, RDI_DataSectionTag tag, RDI_U64 tag_idx); RDI_PROC void rdim_bake_section_list_concat_in_place(RDIM_BakeSectionList *dst, RDIM_BakeSectionList *to_push); //////////////////////////////// diff --git a/src/raddbg/raddbg_main.cpp b/src/raddbg/raddbg_main.cpp index 32b3f3df..c381c490 100644 --- a/src/raddbg/raddbg_main.cpp +++ b/src/raddbg/raddbg_main.cpp @@ -16,9 +16,11 @@ //- rjf: [lib] #include "lib_raddbgi_format/raddbgi_format.h" -#include "lib_raddbgi_format/raddbgi_format_parse.h" #include "lib_raddbgi_format/raddbgi_format.c" +#include "lib_raddbgi_format/raddbgi_format_parse.h" #include "lib_raddbgi_format/raddbgi_format_parse.c" +#include "third_party/rad_lzb_simple/rad_lzb_simple.h" +#include "third_party/rad_lzb_simple/rad_lzb_simple.c" //- rjf: [h] #include "base/base_inc.h" @@ -389,8 +391,15 @@ entry_point(CmdLine *cmd_line) bake2srlz = p2r_bake(scratch.arena, convert2bake); } + //- rjf: compress + P2R_Bake2Serialize *bake2srlz_compressed = bake2srlz; + if(cmd_line_has_flag(cmd_line, str8_lit("compress"))) ProfScope("compress") + { + bake2srlz_compressed = p2r_compress(scratch.arena, bake2srlz); + } + //- rjf: serialize - String8List serialize_out = rdim_serialized_strings_from_params_bake_section_list(scratch.arena, &convert2bake->bake_params, &bake2srlz->sections); + String8List serialize_out = rdim_serialized_strings_from_params_bake_section_list(scratch.arena, &convert2bake->bake_params, &bake2srlz_compressed->sections); //- rjf: write if(out_file_is_good) diff --git a/src/raddbgi_from_pdb/raddbgi_from_pdb.c b/src/raddbgi_from_pdb/raddbgi_from_pdb.c index 7679eb7a..26901abb 100644 --- a/src/raddbgi_from_pdb/raddbgi_from_pdb.c +++ b/src/raddbgi_from_pdb/raddbgi_from_pdb.c @@ -95,11 +95,54 @@ p2r_user2convert_from_cmdln(Arena *arena, CmdLine *cmdline) } } + //- rjf: define string -> flag bits +#define FlagNameMapXList \ +Case("sections", BinarySections)\ +Case("units", Units)\ +Case("procedures", Procedures)\ +Case("globals", GlobalVariables)\ +Case("threadvars", ThreadVariables)\ +Case("scopes", Scopes)\ +Case("locals", Locals)\ +Case("types", Types)\ +Case("udts", UDTs)\ +Case("lines", LineInfo)\ +Case("globals_name_map", GlobalVariableNameMap)\ +Case("threadvars_name_map", ThreadVariableNameMap)\ +Case("procedure_name_map", ProcedureNameMap)\ +Case("type_name_map", TypeNameMap)\ +Case("link_name_map", LinkNameProcedureNameMap)\ +Case("source_path_name_map",NormalSourcePathNameMap)\ + //- rjf: get flags { result->flags = P2R_ConvertFlag_All; + String8List only_names = cmd_line_strings(cmdline, str8_lit("only")); + String8List omit_names = cmd_line_strings(cmdline, str8_lit("only")); + if(only_names.node_count != 0) + { + result->flags = 0; + for(String8Node *n = only_names.first; n != 0; n = n->next) + { + String8 string = n->string; +#define Case(str, flag) if(str8_match(string, str8_lit(str), StringMatchFlag_CaseInsensitive)) {result->flags |= P2R_ConvertFlag_##flag;} + FlagNameMapXList; +#undef Case + } + } + if(omit_names.node_count != 0) + { + for(String8Node *n = omit_names.first; n != 0; n = n->next) + { + String8 string = n->string; +#define Case(str, flag) if(str8_match(string, str8_lit(str), StringMatchFlag_CaseInsensitive)) {result->flags &= ~P2R_ConvertFlag_##flag;} + FlagNameMapXList; +#undef Case + } + } } +#undef FlagNameMapXList return result; } @@ -4238,3 +4281,58 @@ p2r_bake(Arena *arena, P2R_Convert2Bake *in) scratch_end(scratch); return out; } + +//////////////////////////////// +//~ rjf: Top-Level Compression Entry Point + +internal P2R_Bake2Serialize * +p2r_compress(Arena *arena, P2R_Bake2Serialize *in) +{ + RDIM_BakeSectionList prepack_sections = in->sections; + RDIM_BakeSectionList postpack_sections = {0}; + { + //- rjf: set up compression context + rr_lzb_simple_context ctx = {0}; + ctx.m_tableSizeBits = 14; + ctx.m_hashTable = push_array(arena, U16, 1<next) + { + RDIM_BakeSection *src = &src_n->v; + + // rjf: push new section + RDIM_BakeSection *dst = rdim_bake_section_list_push(arena, &postpack_sections); + + // rjf: unpack uncompressed section info + void *data = src->data; + RDI_DataSectionEncoding encoding = src->encoding; + RDI_U64 encoded_size = src->encoded_size; + RDI_U64 unpacked_size = src->unpacked_size; + + // rjf: determine if this section should be compressed + B32 should_compress = 1; + + // rjf: compress if needed + if(should_compress) + { + MemoryZero(ctx.m_hashTable, sizeof(U16)*(1<data = data; + dst->encoding = encoding; + dst->encoded_size = encoded_size; + dst->unpacked_size = unpacked_size; + dst->tag = src->tag; + dst->tag_idx = src->tag_idx; + } + } + P2R_Bake2Serialize *out = push_array(arena, P2R_Bake2Serialize, 1); + out->sections = postpack_sections; + return out; +} diff --git a/src/raddbgi_from_pdb/raddbgi_from_pdb.h b/src/raddbgi_from_pdb/raddbgi_from_pdb.h index 7b1ae67e..226c0bde 100644 --- a/src/raddbgi_from_pdb/raddbgi_from_pdb.h +++ b/src/raddbgi_from_pdb/raddbgi_from_pdb.h @@ -5,16 +5,35 @@ #define RADDBGI_FROM_PDB_H //////////////////////////////// -//~ rjf: Conversion Stage Inputs/Outputs +//~ rjf: Export Artifact Flags typedef U32 P2R_ConvertFlags; enum { - P2R_ConvertFlag_Types = (1<<0), - P2R_ConvertFlag_UDTs = (1<<1), + P2R_ConvertFlag_Strings = (1<<0), + P2R_ConvertFlag_IndexRuns = (1<<1), + P2R_ConvertFlag_BinarySections = (1<<2), + P2R_ConvertFlag_Units = (1<<3), + P2R_ConvertFlag_Procedures = (1<<4), + P2R_ConvertFlag_GlobalVariables = (1<<5), + P2R_ConvertFlag_ThreadVariables = (1<<6), + P2R_ConvertFlag_Scopes = (1<<7), + P2R_ConvertFlag_Locals = (1<<8), + P2R_ConvertFlag_Types = (1<<9), + P2R_ConvertFlag_UDTs = (1<<10), + P2R_ConvertFlag_LineInfo = (1<<11), + P2R_ConvertFlag_GlobalVariableNameMap = (1<<12), + P2R_ConvertFlag_ThreadVariableNameMap = (1<<13), + P2R_ConvertFlag_ProcedureNameMap = (1<<14), + P2R_ConvertFlag_TypeNameMap = (1<<15), + P2R_ConvertFlag_LinkNameProcedureNameMap= (1<<16), + P2R_ConvertFlag_NormalSourcePathNameMap = (1<<17), P2R_ConvertFlag_All = 0xffffffff, }; +//////////////////////////////// +//~ rjf: Conversion Stage Inputs/Outputs + typedef struct P2R_User2Convert P2R_User2Convert; struct P2R_User2Convert { @@ -602,4 +621,9 @@ internal TS_TASK_FUNCTION_DEF(p2r_bake_idx_runs_task__entry_point); internal P2R_Bake2Serialize *p2r_bake(Arena *arena, P2R_Convert2Bake *in); +//////////////////////////////// +//~ rjf: Top-Level Compression Entry Point + +internal P2R_Bake2Serialize *p2r_compress(Arena *arena, P2R_Bake2Serialize *in); + #endif // RADDBGI_FROM_PDB_H diff --git a/src/raddbgi_from_pdb/raddbgi_from_pdb_main.c b/src/raddbgi_from_pdb/raddbgi_from_pdb_main.c index bc0ef474..e323bc09 100644 --- a/src/raddbgi_from_pdb/raddbgi_from_pdb_main.c +++ b/src/raddbgi_from_pdb/raddbgi_from_pdb_main.c @@ -17,6 +17,8 @@ //- rjf: [lib] #include "lib_raddbgi_format/raddbgi_format.h" #include "lib_raddbgi_format/raddbgi_format.c" +#include "third_party/rad_lzb_simple/rad_lzb_simple.h" +#include "third_party/rad_lzb_simple/rad_lzb_simple.c" //- rjf: [h] #include "base/base_inc.h" @@ -96,8 +98,15 @@ entry_point(CmdLine *cmdline) bake2srlz = p2r_bake(arena, convert2bake); } + //- rjf: compress + P2R_Bake2Serialize *bake2srlz_compressed = bake2srlz; + if(cmd_line_has_flag(cmdline, str8_lit("compress"))) ProfScope("compress") + { + bake2srlz_compressed = p2r_compress(arena, bake2srlz); + } + //- rjf: serialize - String8List serialize_out = rdim_serialized_strings_from_params_bake_section_list(arena, &convert2bake->bake_params, &bake2srlz->sections); + String8List serialize_out = rdim_serialized_strings_from_params_bake_section_list(arena, &convert2bake->bake_params, &bake2srlz_compressed->sections); //- rjf: write ProfScope("write") diff --git a/src/third_party/rad_lzb_simple/rad_lzb_simple.c b/src/third_party/rad_lzb_simple/rad_lzb_simple.c new file mode 100644 index 00000000..3e6e3516 --- /dev/null +++ b/src/third_party/rad_lzb_simple/rad_lzb_simple.c @@ -0,0 +1,1402 @@ +#include + +//------------------------------------------------- +// UINTr = int the size of a register + +#ifdef __RAD64REGS__ + +#define RAD_UINTr RAD_U64 +#define RAD_SINTr RAD_S64 + +#define readR read64 +#define writeR write64 + +#define rrClzBytesR rrClzBytes64 +#define rrCtzBytesR rrCtzBytes64 + +#else + +#define RAD_UINTr RAD_U32 +#define RAD_SINTr RAD_S32 + +#define readR read32 +#define writeR write32 + +#define rrClzBytesR rrClzBytes32 +#define rrCtzBytesR rrCtzBytes32 + +#endif + +typedef RAD_SINTr SINTr; +typedef RAD_UINTr UINTr; + +#define OOINLINE RADFORCEINLINE + +#define if_unlikely(exp) if ( RAD_UNLIKELY( exp ) ) +#define if_likely( exp) if ( RAD_LIKELY( exp ) ) + +// Raw byte IO + +#if defined(__RADARM__) && !defined(__RAD64__) && defined(__GNUC__) + +// older GCCs don't turn the memcpy variant into loads/stores, but +// they do support this: +typedef union +{ + U16 u16; + U32 u32; + U64 u64; +} __attribute__((packed)) unaligned_type; + +static inline U16 read16(const void *ptr) { return ((const unaligned_type *)ptr)->u16; } +static inline void write16(void *ptr, U16 x) { ((unaligned_type *)ptr)->u16 = x; } + +static inline U32 read32(const void *ptr) { return ((const unaligned_type *)ptr)->u32; } +static inline void write32(void *ptr, U32 x) { ((unaligned_type *)ptr)->u32 = x; } + +static inline U64 read64(const void *ptr) { return ((const unaligned_type *)ptr)->u64; } +static inline void write64(void *ptr, U64 x) { ((unaligned_type *)ptr)->u64 = x; } + +#else + +// most C compilers we target are smart enough to turn this into single loads/stores +static inline U16 read16(const void *ptr) { U16 x; memcpy(&x, ptr, sizeof(x)); return x; } +static inline void write16(void *ptr, U16 x) { memcpy(ptr, &x, sizeof(x)); } + +static inline U32 read32(const void *ptr) { U32 x; memcpy(&x, ptr, sizeof(x)); return x; } +static inline void write32(void *ptr, U32 x) { memcpy(ptr, &x, sizeof(x)); } + +static inline U64 read64(const void *ptr) { U64 x; memcpy(&x, ptr, sizeof(x)); return x; } +static inline void write64(void *ptr, U64 x) { memcpy(ptr, &x, sizeof(x)); } + +#endif + +#define RR_PUT16_LE_UNALIGNED(ptr,val) RR_PUT16_LE(ptr,val) +#define RR_PUT16_LE_UNALIGNED_OFFSET(ptr,val,offset) RR_PUT16_LE_OFFSET(ptr,val,offset) + +//=========================================================================== + +static RADINLINE SINTa rrPtrDiffV(void * end, void *start) { return (SINTa)( ((char *)(end)) - ((char *)(start)) ); } + +// helper function to show I really am intending to put a pointer difference in an int : +static RADINLINE SINTa rrPtrDiff(SINTa val) { return val; } +static RADINLINE S32 rrPtrDiff32(SINTa val) { S32 ret = (S32) val; RR_ASSERT( (SINTa)ret == val ); return ret; } +static RADINLINE SINTr rrPtrDiffR(SINTa val) { SINTr ret = (SINTr) val; RR_ASSERT( (SINTa)ret == val ); return ret; } + +//================================================================= + +#define LZB_LRL_BITS 4 +#define LZB_LRL_ESCAPE 15 + +#define LZB_ML_BITS 4 +#define LZB_MLCONTROL_ESCAPE 15 + +#define LZB_SLIDING_WINDOW_POW2 16 +#define LZB_SLIDING_WINDOW_SIZE (1<>= 6; \ +if ( val < 128 ) *cp++ = (U8) val; \ +else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \ +if ( val < 128 ) *cp++ = (U8) val; \ +else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \ +if ( val < 128 ) *cp++ = (U8) val; \ +else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; *cp++ = (U8) val; } } } } \ +} while(0) + +// max bytes consumed: 5 +#define LZB_AddExcessBW(cp,val) do { U32 b = *cp++; \ +if ( b < 192 ) val += b; \ +else { val += 192; val += (b-192); b = *cp++; \ +val += (b<<6); if ( b >= 128 ) { b = *cp++; \ +val += (b<<13); if ( b >= 128 ) { b = *cp++; \ +val += (b<<20); if ( b >= 128 ) { b = *cp++; \ +val += (b<<27); } } } } \ +} while(0) + +#define LZB_PutExcessLRL(cp,val) LZB_PutExcessBW(cp,val) +#define LZB_PutExcessML(cp,val) LZB_PutExcessBW(cp,val) + +#define LZB_AddExcessLRL(cp,val) LZB_AddExcessBW(cp,val) +#define LZB_AddExcessML(cp,val) LZB_AddExcessBW(cp,val) + +//============================================================================= +// match copies : + +// used for LRL : +static OOINLINE void copy_no_overlap_long(U8 * to, const U8 * from, SINTr length) +{ + for(int i=0;i= LZB_MML && ml < LZB_MATCHLEN_ESCAPE ); + + // overlap + // @@ err not awesome + to[0] = from[0]; + to[1] = from[1]; + to[2] = from[2]; + to[3] = from[3]; + to[4] = from[4]; + to[5] = from[5]; + to[6] = from[6]; + to[7] = from[7]; + if ( ml > 8 ) + { + to += 8; from += 8; ml -= 8; + // max of 10 more + while(ml--) + { + *to++ = *from++; + } + } +} + +static OOINLINE void copy_match_memset(U8 * to, int c, SINTr ml) +{ + RR_ASSERT( ml >= 4 ); + U32 four = c * 0x01010101; + U8 * end = to + ml; + write32(to, four); to += 4; + while(to>4); + + // copy 4 literals speculatively : + write32( rp , read32(cp) ); + + //RR_ASSERT( lrl >= 8 || ml_control >= 8 ); + + if ( lrl > 4 ) + { + // if lrl was <= 8 we did it, else need this : + if_unlikely ( lrl > 8 ) + { + if_unlikely ( lrl >= LZB_LRL_ESCAPE ) + { + LZB_AddExcessLRL( cp, lrl ); + + // hide the EOF check here ? + // has to be after the GetExcess + if_unlikely ( rp+lrl >= rpEnd ) + { + RR_ASSERT( rp+lrl == rpEnd ); + + copy_no_overlap_nooverrun(rp,cp,lrl); + + rp += lrl; + cp += lrl; + break; + } + else + { + // total undo of the previous copy + copy_no_overlap_long(rp,cp,lrl); + } + } + else // > 8 but not 0xF + { + // hide the EOF check here ? + if_unlikely ( rp+lrl >= rpEnd ) + { + if ( lrl == 9 ) + { + // may be a false 9 + lrl = rrPtrDiff32( rpEnd - rp ); + } + RR_ASSERT( rp+lrl == rpEnd ); + + copy_no_overlap_nooverrun(rp,cp,lrl); + + rp += lrl; + cp += lrl; + break; + } + else + { + write32( rp+4 , read32(cp+4) ); + // put 8 more : + write64( (rp+8) , read64((cp+8)) ); + } + } + } + else + { + write32( rp+4 , read32(cp+4) ); + } + } + + rp += lrl; + cp += lrl; + + RR_ASSERT( rp+LZB_MML <= rpEnd ); + + UINTr ml = ml_control + LZB_MML; + + // speculatively grab offset but don't advance cp yet + UINTr off = RR_GET16_LE_UNALIGNED(cp); + + if ( ml_control <= 8 ) + { + cp += 2; // consume offset + const U8 * match = rp - off; + + RR_ASSERT( ml <= 12 ); + + write64( rp , read64(match) ); + write32( rp+8 , read32(match+8) ); + + rp += ml; + continue; + } + else + { + + if_likely( ml_control < LZB_MLCONTROL_ESCAPE ) // short match + { + cp += 2; // consume offset + const U8 * match = rp - off; + + RR_ASSERT( off >= 8 || ml <= off ); + + write64( rp , read64(match) ); + write64( rp+8 , read64(match+8) ); + + if ( ml > 16 ) + { + write16( rp+16, read16(match+16) ); + } + } + else + { + // get 1-byte excess code + UINTr excesslow = off&127; + cp++; // consume 1 + + //if ( excess1 >= 128 ) + if ( off & 128 ) + { + ml_control = excesslow >> 3; + ml = ml_control + LZB_MML; + if ( ml_control == 0xF ) + { + // get more ml + LZB_AddExcessML( cp, ml ); + } + + UINTr myoff = off & 7; + + // low offset, can't do 8-byte grabs + if ( myoff == 1 ) + { + int c = rp[-1]; + copy_match_memset(rp,c,ml); + } + else + { + // shit but whatever, very rare + for(UINTr i=0;i>13); + return h; +} + +#define HashMatchFinder_Hash32 hmf_hash4_32 + +//================================================================================= + +#define LZB_Hash4 hmf_hash4_32 + +static RADINLINE U32 LZB_SecondHash4(U32 be4) +{ + const U32 m = 0x5bd1e995; + + U32 h = be4 * m; + h += (h>>11); + + return h; +} + +//============================================= + +static int RADFORCEINLINE GetNumBytesZeroNeverAllR(UINTr x) +{ + RR_ASSERT( x != 0 ); + +#if defined(__RADBIGENDIAN__) + // big endian, so earlier bytes are at the top + int nb = (int)rrClzBytesR(x); +#elif defined(__RADLITTLEENDIAN__) + // little endian, so earlier bytes are at the bottom + int nb = (int)rrCtzBytesR(x); +#else +#error wtf no endian set +#endif + + RR_ASSERT( nb >= 0 && nb < (int)sizeof(UINTr) ); + return nb; +} + +//=============================== + +static RADFORCEINLINE U8 * LZB_Output(U8 * cp, S32 lrl, const U8 * literals, S32 matchlen , S32 mo ) +{ + RR_ASSERT( lrl >= 0 ); + RR_ASSERT( matchlen >= LZB_MML ); + RR_ASSERT( mo > 0 && mo <= LZB_MAX_OFFSET ); + + //rrprintf("[%3d][%3d][%7d]\n",lrl,ml,mo); + + S32 sendml = matchlen - LZB_MML; + + U32 ml_in_control = RR_MIN(sendml,LZB_MLCONTROL_ESCAPE); + + if ( mo >= 8 ) // no overlap + { + if ( lrl < LZB_LRL_ESCAPE ) + { + U32 control = lrl | (ml_in_control<<4); + + *cp++ = (U8) control; + + write64(cp, read64(literals)); + if ( lrl > 8 ) + { + write64(cp+8, read64(literals+8)); + } + cp += lrl; + } + else + { + U32 control = LZB_LRL_ESCAPE | (ml_in_control<<4); + + *cp++ = (U8) control; + + U32 lrl_excess = lrl - LZB_LRL_ESCAPE; + LZB_PutExcessLRL(cp,lrl_excess); + + // @@ ? is this okay for overrun ? + lz_copysteptoend_overrunok(cp,literals,lrl); + } + + if ( ml_in_control < LZB_MLCONTROL_ESCAPE ) + { + RR_ASSERT( (U16)(mo) == mo ); + RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); + cp += 2; + } + else + { + U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE; + + // put special first byte, then offset, then remainder + if ( ml_excess < 127 ) + { + *cp++ = (U8)ml_excess; + + RR_ASSERT( (U16)(mo) == mo ); + RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); + cp += 2; + } + else + { + *cp++ = (U8)127; + + RR_ASSERT( (U16)(mo) == mo ); + RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); + cp += 2; + + ml_excess -= 127; + LZB_PutExcessML(cp,ml_excess); + } + } + } + else + { + U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE); + + // overlap case + U32 control = (lrl_in_control) | (LZB_MLCONTROL_ESCAPE<<4); + + *cp++ = (U8) control; + + if ( lrl_in_control == LZB_LRL_ESCAPE ) + { + U32 lrl_excess = lrl - LZB_LRL_ESCAPE; + LZB_PutExcessLRL(cp,lrl_excess); + } + + lz_copysteptoend_overrunok(cp,literals,lrl); + //cp += lrl; + + // special excess1 : + UINTr excess1 = 128 + (ml_in_control<<3) + mo; + RR_ASSERT( excess1 < 256 ); + + *cp++ = (U8)excess1; + + if ( ml_in_control == LZB_MLCONTROL_ESCAPE ) + { + U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE; + LZB_PutExcessML(cp,ml_excess); + } + } + + return cp; +} + +#if LZB_FORCELASTLRL9 + +static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals ) +{ + RR_ASSERT( lrl >= 0 ); + + //U32 ml = 0; + //U32 mo = 0; + + U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE); + +#if LZB_END_WITH_LITERALS + // lrl_in_control must be at least 9 + lrl_in_control = RR_MAX(lrl_in_control,9); +#endif + + U32 control = lrl_in_control; + + *cp++ = (U8) control; + + if ( lrl_in_control == LZB_LRL_ESCAPE ) + { + U32 lrl_excess = lrl - LZB_LRL_ESCAPE; + LZB_PutExcessLRL(cp,lrl_excess); + } + + memmove(cp,literals,lrl); + cp += lrl; + + return cp; +} + +#else + +static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals ) +{ + cp = LZB_Output(cp,lrl,literals,LZB_MML,1); + + // remove the offset we put : + cp -= 2; + + return cp; +} + +#endif + +//=============================================================== + +static void rr_lzb_simple_context_init(rr_lzb_simple_context * ctx) //, const void * base) +{ + RR_ASSERT( ctx->m_tableSizeBits >= 12 && ctx->m_tableSizeBits <= 24 ); + memset(ctx->m_hashTable,0,sizeof(U16)*((SINTa)1<m_tableSizeBits)); +} + +//=============================================================== + +/* +#define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression, +#define DO_FAST_2ND_HASH // rate= 30.69 mb/s , 15451369 <- turning this off is the best way to get more speed and less compression +/*/ +#define FAST_HASH_DEPTH_SHIFT (0) +#define DO_FAST_2ND_HASH +/**/ + +// lzt99, 24700820, 15475520, 16677179 +//encode only : 0.880 seconds, 1.62 b/hc, rate= 28.08 mb/s + +//#define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression, but slower + +#define DO_FAST_UPDATE_MATCH_HASHES 1 // helps compression a lot , like 0.30 +//#define DO_FAST_UPDATE_MATCH_HASHES 2 // helps compression a lot , like 0.30 +#define DO_FAST_LAZY_MATCH // also helps a lot , like 0.15 +#define DO_FAST_HASH_DWORD 1 + +#define FAST_MULTISTEP_LITERALS_SHIFT (5) + + +//----------------------- +// derived : + +/* +#define FAST_HASH_BITS (FAST_HASH_TOTAL_BITS-FAST_HASH_DEPTH_SHIFT) +#define FAST_HASH_SIZE (1< 1 +#define FAST_HASH_INDEX(h,d) ( ((h)< 1 + int hashCycle = 0; +#endif + + U16 * hashTable16 = fh->m_hashTable; + + int hashTableSizeBits = fh->m_tableSizeBits; + U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits - FAST_HASH_DEPTH_SHIFT)) - 1); + + const U8 * zeroPosPtr = (const U8 *)raw; + + // first byte is always a literal + rp++; + + for(;;) + { + S32 matchOff; + + UINTr failedMatches = (1<= 0 ); + +#ifdef DO_FAST_2ND_HASH + hash2 = ( LZB_SecondHash4(rp32) ) & hash_table_mask; +#endif + +#if FAST_HASH_DEPTH > 1 + for(int d=0;d= 0 ); + + hashrp = rp - matchOff; + + //if ( matchOff <= LZB_MAX_OFFSET ) + RR_ASSERT( matchOff <= LZB_MAX_OFFSET ); + { + const U32 hashrp32 = read32(hashrp); + + if ( rp32 == hashrp32 && matchOff != 0 ) + { + goto found_match; + } + } + } + +#ifdef DO_FAST_2ND_HASH + +#if FAST_HASH_DEPTH > 1 + for(int d=0;d= 0 ); + + hashrp = rp - matchOff; + + RR_ASSERT( matchOff <= LZB_MAX_OFFSET ); + { + const U32 hashrp32 = read32(hashrp); + + if ( rp32 == hashrp32 && matchOff != 0 ) + { + goto found_match; + } + } + } + +#endif + + //--------------------------- + // update hash : + + hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos; + +#ifdef DO_FAST_2ND_HASH + // do NOT step hashCycle ! + //hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; + hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos; +#endif + +#if FAST_HASH_DEPTH > 1 + hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; +#endif + + UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT); + RR_ASSERT( stepLiterals >= 1 ); + + ++failedMatches; + + rp += stepLiterals; + + if ( rp >= rpEndSafe ) + goto done; + + rp32 = read32(rp); + hash = FAST_HASH_FUNC(rp, rp32 ); + + } + + //------------------------------- + found_match: + + // found something + + //------------------------- + // update hash now so lazy can see it : + +#if 1 // pretty important to compression + hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos; + +#ifdef DO_FAST_2ND_HASH + // do NOT step hashCycle ! + //hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; + hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos; +#endif + +#if FAST_HASH_DEPTH > 1 + hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; +#endif +#endif + + //----------------------------------- + + const U8 * match_start = rp; + rp += 4; + + while( rp < rpEndSafe ) + { + UINTr big1 = readR(rp); + UINTr big2 = readR(rp-matchOff); + + if ( big1 == big2 ) + { + rp += RAD_PTRBYTES; + continue; + } + else + { + rp += GetNumBytesZeroNeverAllR(big1^big2); + break; + } + } + rp = RR_MIN(rp,rpMatchEnd); + + //------------------------------- + // rp is now at the *end* of the match + + //------------------------------- + + // check lazy match too +#ifdef DO_FAST_LAZY_MATCH + if (rp< rpEndSafe) + { + const U8 * lazyrp = match_start + 1; + //SINTa lazypos = rrPtrDiff(lazyrp - zeroPosPtr); + SINTa lazypos = curpos + 1; + RR_ASSERT( lazypos == rrPtrDiff(lazyrp - zeroPosPtr) ); + + U32 lazyrp32 = read32(lazyrp); + + const U8 * lazyhashrp; + SINTa lazymatchOff; + + U32 lazyHash = FAST_HASH_FUNC(lazyrp, lazyrp32 ); + +#ifdef DO_FAST_2ND_HASH + U32 lazyhash2 = LZB_SecondHash4(lazyrp32) & hash_table_mask; +#endif + +#if FAST_HASH_DEPTH > 1 + for(int d=0;d= 0 ); + + RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET ); + { + lazyhashrp = lazyrp - lazymatchOff; + + const U32 hashrp32 = read32(lazyhashrp); + + if ( lazyrp32 == hashrp32 && lazymatchOff != 0 ) + { + goto lazy_found_match; + } + } + } + +#ifdef DO_FAST_2ND_HASH +#if FAST_HASH_DEPTH > 1 + for(int d=0;d= 0 ); + + RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET ); + { + lazyhashrp = lazyrp - lazymatchOff; + + const U32 hashrp32 = read32(lazyhashrp); + + if ( lazyrp32 == hashrp32 && lazymatchOff != 0 ) + { + goto lazy_found_match; + } + } + } +#endif + + if ( 0 ) + { + lazy_found_match: + + lazyrp += 4; + + while( lazyrp < rpEndSafe ) + { + UINTr big1 = readR(lazyrp); + UINTr big2 = readR(lazyrp-lazymatchOff); + + if ( big1 == big2 ) + { + lazyrp += RAD_PTRBYTES; + continue; + } + else + { + lazyrp += GetNumBytesZeroNeverAllR(big1^big2); + break; + } + } + lazyrp = RR_MIN(lazyrp,rpMatchEnd); + + //S32 lazymatchLen = rrPtrDiff32( lazyrp - (match_start+1) ); + //RR_ASSERT( lazymatchLen >= 4 ); + + if ( lazyrp >= rp+3 ) + { + // yes take the lazy match + + // put a literal : + match_start++; + + // I had a bug where lazypos was set wrong for the hash fill + // it set it to the *end* of the normal match + // and for some reason that helped compression WTF WTF + //SINTa lazypos = rrPtrDiff(rp - zeroPosPtr); // 233647528 + // with correct lazypos : 233651228 + + // really this shouldn't be necessary at all + // because I do an update of hash at all positions in the match including first! +#if 1 // with update disabled - 233690274 + + hashTable16[ FAST_HASH_INDEX(lazyHash,hashCycle) ] = (U16) lazypos; + +#ifdef DO_FAST_2ND_HASH + // do NOT step hashCycle ! + hashTable16[ FAST_HASH_INDEX(lazyhash2,hashCycle) ] = (U16) lazypos; +#endif + +#if FAST_HASH_DEPTH > 1 + hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; +#endif + +#endif + + // and then drop out and do the lazy match : + //matchLen = lazymatchLen; + matchOff = (S32)lazymatchOff; + rp = lazyrp; + hashrp = lazyhashrp; + } + } + } +#endif + + //--------------------------------------------------- + + // back up start of match that we missed due to stepLiterals ! + // make sure we don't read off the start of the array + + // this costs a little speed and gains a little compression + // 15662162 at 121.58 mb/s + // 15776473 at 127.92 mb/s +#if 1 + /* + lzbf : 24,700,820 ->15,963,503 = 5.170 bpb = 1.547 to 1 + encode : 0.171 seconds, 83.60 b/kc, rate= 144.54 M/s + decode : 0.014 seconds, 1002.64 b/kc, rate= 1733.57 M/s + */ + { + // 144 M/s + // back up start of match that we missed + // make sure we don't read off the start of the array + + const U8 * rpm1 = match_start-1; + if ( rpm1 >= literals_start && hashrp > zeroPosPtr && rpm1[0] == hashrp[-1] ) + { + rpm1--; hashrp-= 2; + + while ( rpm1 >= literals_start && hashrp >= zeroPosPtr && rpm1[0] == *hashrp ) + { + rpm1--; + hashrp--; + } + + match_start = rpm1+1; + //rp = RR_MAX(rp,literals_start); + RR_ASSERT( match_start >= literals_start ); + } + } +#endif + + S32 matchLen = rrPtrDiff32( rp - match_start ); + RR_ASSERT( matchLen >= 4 ); + + //=============================================== + // chose a match + // output LRL (if any) and match + + S32 cur_lrl = rrPtrDiff32(match_start - literals_start); + + // catch expansion while writing : + if_unlikely ( cp+cur_lrl >= compExpandedPtr ) + { + return rawLen+1; + } + + cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff); + + // skip the match : + literals_start = rp; + + if ( rp >= rpEndSafe ) + break; + + // step & update hashes : + // (I already did cur pos) +#ifdef DO_FAST_UPDATE_MATCH_HASHES + // don't bother if it takes us to the end : + // (this check is not for speed it's to avoid the access violation) + const U8 * ptr = match_start+1; + U16 pos16 = (U16) rrPtrDiff( ptr - zeroPosPtr ); + for(;ptr 0 ); +#endif + + if ( cur_lrl > 0 ) + { + // catch expansion while writing : + if ( cp+cur_lrl >= compExpandedPtr ) + { + return rawLen+1; + } + + cp = LZB_OutputLast(cp,cur_lrl,literals_start); + } + + SINTa compLen = rrPtrDiff( cp - (U8 *)comp ); + + return compLen; +} + +SINTa rr_lzb_simple_encode_fast(rr_lzb_simple_context * fh, + const void * raw, SINTa rawLen, void * comp) +{ + rr_lzb_simple_context_init(fh); //,raw); + + SINTa comp_len = rr_lzb_simple_encode_fast_sub(fh,raw,rawLen,comp); + if ( comp_len >= rawLen ) + { + memcpy(comp,raw,rawLen); + return rawLen; + } + return comp_len; +} + +#undef FAST_HASH_DEPTH_SHIFT + +#undef DO_FAST_UPDATE_MATCH_HASHES +#undef DO_FAST_LAZY_MATCH +#undef DO_FAST_2ND_HASH + +//===================================================== + +#define FAST_HASH_DEPTH_SHIFT (0) + +#undef FAST_MULTISTEP_LITERALS_SHIFT +#define FAST_MULTISTEP_LITERALS_SHIFT (4) + + + +//----------------------- +// derived : + +RR_COMPILER_ASSERT( FAST_HASH_DEPTH_SHIFT == 0 ); + +#undef FAST_HASH_FUNC +//#define FAST_HASH_FUNC(ptr,dword) ( LZB_Hash4(dword) & hash_table_mask ) +#define FAST_HASH_FUNC(ptr,dword) ( (((dword)*2654435761U)>>16) & hash_table_mask ) + + +// @@@@ ???? +#define LZBVF_DO_BACKUP 0 +//#define LZBVF_DO_BACKUP 1 + + +static SINTa rr_lzb_simple_encode_veryfast_sub(rr_lzb_simple_context * fh, + const void * raw, SINTa rawLen, void * comp) +{ + //SIMPLEPROFILE_SCOPE_N(lzbfast_sub,rawLen); + //THREADPROFILEFUNC(); + + U8 * cp = (U8 *)comp; + U8 * compExpandedPtr = cp + rawLen - 8; + + const U8 * rp = (const U8 *)raw; + const U8 * rpEnd = rp+rawLen; + + // we can match up to rpEnd + // but matches can't start past rpEndSafe + const U8 * rpMatchEnd = rpEnd - LZB_END_OF_BLOCK_NO_MATCH_ZONE; + + const U8 * rpEndSafe = rpMatchEnd - LZB_MML; + + if ( rpEndSafe <= raw ) + { + // can't compress + return rawLen+1; + } + + const U8 * literals_start = rp; + + U16 * hashTable16 = fh->m_hashTable; + int hashTableSizeBits = fh->m_tableSizeBits; + U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits)) - 1); + + const U8 * zeroPosPtr = (const U8 *)raw; + + // first byte is always a literal + rp++; + + for(;;) + { + U32 rp32 = read32(rp); + U32 hash = FAST_HASH_FUNC(rp, rp32 ); + const U8 * hashrp; + S32 matchOff; + UINTr failedMatches; + + // loop while no match found : + + // first loop with step = 1 + // @@ + //int step1count = (1<= 0 ); + + U16 hashpos16 = hashTable16[hash]; + hashTable16[ hash ] = (U16) curpos; + + matchOff = (U16)(curpos - hashpos16); + RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET ); + hashrp = rp - matchOff; + + const U32 hashrp32 = read32(hashrp); + if ( rp32 == hashrp32 && matchOff != 0 ) + { + goto found_match; + } + + if ( ++rp >= rpEndSafe ) + goto done; + + rp32 = read32(rp); + hash = FAST_HASH_FUNC(rp, rp32 ); + } + + // step starts at 2 : + failedMatches = (2<= 0 ); + + U16 hashpos16 = hashTable16[hash]; + hashTable16[ hash ] = (U16) curpos; + + matchOff = (U16)(curpos - hashpos16); + RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET ); + hashrp = rp - matchOff; + + const U32 hashrp32 = read32(hashrp); + + if ( rp32 == hashrp32 && matchOff != 0 ) + { + goto found_match; + } + + UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT); + RR_ASSERT( stepLiterals >= 1 ); + + ++failedMatches; + + rp += stepLiterals; + + if ( rp >= rpEndSafe ) + goto done; + + rp32 = read32(rp); + hash = FAST_HASH_FUNC(rp, rp32 ); + } + + //------------------------------- + found_match: + + // found something + +#if LZBVF_DO_BACKUP + + // alternative backup using counter : + S32 cur_lrl = rrPtrDiff32(rp - literals_start); + int neg_max_backup = - RR_MIN(cur_lrl , rrPtrDiff32(hashrp - zeroPosPtr) ); + int neg_backup = -1; + if( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] ) + { + neg_backup--; + while( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] ) + { + neg_backup--; + } + neg_backup++; + rp += neg_backup; + cur_lrl += neg_backup; + RR_ASSERT( cur_lrl >= 0 ); + RR_ASSERT( cur_lrl == rrPtrDiff32(rp - literals_start) ); + } + +#else + + S32 cur_lrl = rrPtrDiff32(rp - literals_start); + +#endif + + // catch expansion while writing : + if_unlikely ( cp+cur_lrl >= compExpandedPtr ) + { + return rawLen+1; + } + + RR_ASSERT( matchOff >= 1 ); + + //--------------------------------------- + // find rest of match len + // save pointer to start of match + // walk rp ahead to end of match + const U8 * match_start = rp; + rp += 4; + + while( rp < rpEndSafe ) + { + UINTr big1 = readR(rp); + UINTr big2 = readR(rp-matchOff); + + if ( big1 == big2 ) + { + rp += RAD_PTRBYTES; + continue; + } + else + { + rp += GetNumBytesZeroNeverAllR(big1^big2); + break; + } + } + rp = RR_MIN(rp,rpMatchEnd); + S32 matchLen = rrPtrDiff32( rp - match_start ); + + //=============================================== + // chose a match + // output LRL (if any) and match + + cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff); + + // skip the match : + literals_start = rp; + + if ( rp >= rpEndSafe ) + goto done; + } + + done: + + int cur_lrl = rrPtrDiff32(rpEnd - literals_start); +#if LZB_END_WITH_LITERALS + RR_ASSERT_ALWAYS(cur_lrl > 0 ); +#endif + + if ( cur_lrl > 0 ) + { + // catch expansion while writing : + if ( cp+cur_lrl >= compExpandedPtr ) + { + return rawLen+1; + } + + cp = LZB_OutputLast(cp,cur_lrl,literals_start); + } + + SINTa compLen = rrPtrDiff( cp - (U8 *)comp ); + + return compLen; +} + +SINTa rr_lzb_simple_encode_veryfast(rr_lzb_simple_context * fh, + const void * raw, SINTa rawLen, void * comp) +{ + rr_lzb_simple_context_init(fh); //,raw); + + SINTa comp_len = rr_lzb_simple_encode_veryfast_sub(fh,raw,rawLen,comp); + if ( comp_len >= rawLen ) + { + memcpy(comp,raw,rawLen); + return rawLen; + } + return comp_len; +} + +#undef FAST_HASH_DEPTH_SHIFT + +#undef DO_FAST_UPDATE_MATCH_HASHES +#undef DO_FAST_LAZY_MATCH +#undef DO_FAST_2ND_HASH + +//===================================================== +// vim:noet:sw=4:ts=4 diff --git a/src/third_party/rad_lzb_simple/rad_lzb_simple.h b/src/third_party/rad_lzb_simple/rad_lzb_simple.h new file mode 100644 index 00000000..c1e5e96e --- /dev/null +++ b/src/third_party/rad_lzb_simple/rad_lzb_simple.h @@ -0,0 +1,141 @@ +#ifndef _RAD_LZB_SIMPLE_H_ +#define _RAD_LZB_SIMPLE_H_ + +/*====================================================== + +To encode : + + Set up an rr_lzb_simple_context + + fill out m_tableSizeBits (14-16 is typical) + + allocate m_hashTable + + rr_lzb_simple_context c; + c.m_tableSizeBits = 14; + c.m_hashTable = OODLE_MALLOC_ARRAY(U16,RR_ONE_SA< +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; +typedef uint64_t U64; +typedef int8_t S8; +typedef int16_t S16; +typedef int32_t S32; +typedef int64_t S64; + +typedef S64 SINTa; +typedef U64 RAD_U64; +typedef S64 RAD_S64; +typedef U32 RAD_U32; +typedef S32 RAD_S32; + +#define RADINLINE __inline + +#if defined(_MSC_VER) +# define RADFORCEINLINE __forceinline +#elif defined(__clang__) +# define RADFORCEINLINE __attribute__((always_inline)) +#else +# error need force inline for this compiler +#endif + +#define RR_STRING_JOIN(arg1, arg2) RR_STRING_JOIN_DELAY(arg1, arg2) +#define RR_STRING_JOIN_DELAY(arg1, arg2) RR_STRING_JOIN_IMMEDIATE(arg1, arg2) +#define RR_STRING_JOIN_IMMEDIATE(arg1, arg2) arg1 ## arg2 + +#ifdef _MSC_VER +#define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__COUNTER__) +#else +#define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__LINE__) +#endif + +#define RR_COMPILER_ASSERT(exp) typedef char RR_NUMBERNAME(_dummy_array) [ (exp) ? 1 : -1 ] + +#if defined(__clang__) +# define Expect(expr, val) __builtin_expect((expr), (val)) +#else +# define Expect(expr, val) (expr) +#endif + +#define RAD_LIKELY(expr) Expect(expr,1) +#define RAD_UNLIKELY(expr) Expect(expr,0) + +#define __RADLITTLEENDIAN__ 1 +#define RAD_PTRBYTES 8 +#define RR_MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define RR_MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define RR_ASSERT_ALWAYS(c) do{if(!(c)) {__debugbreak();}}while(0) +#define RR_ASSERT(c) RR_ASSERT_ALWAYS(c) + +#define RR_PUT16_LE(ptr,val) *((U16 *)(ptr)) = (U16)(val) +#define RR_GET16_LE_UNALIGNED(ptr) *((const U16 *)(ptr)) + +static RADINLINE U32 +rrCtzBytes32(U32 val) +{ + // Don't get fancy here. Assumes val != 0. + if (val & 0x000000ffu) return 0; + if (val & 0x0000ff00u) return 1; + if (val & 0x00ff0000u) return 2; + return 3; +} + +static RADINLINE U32 +rrCtzBytes64(U64 val) +{ + U32 lo = (U32) val; + return lo ? rrCtzBytes32(lo) : 4 + rrCtzBytes32((U32) (val >> 32)); +} + +//~ + +//--------------------- + +typedef struct rr_lzb_simple_context rr_lzb_simple_context; +struct rr_lzb_simple_context +{ + U16 * m_hashTable; // must be allocated to sizeof(U16)*(1<