// Copyright (c) Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) //////////////////////////////// //~ Compression/Decompression Forward-Declares #ifndef _RAD_LZB_SIMPLE_H_ #define _RAD_LZB_SIMPLE_H_ /*====================================================== To encode : Set up an rr_lzb_simple_context fill out m_tableSizeBits (14-16 is typical) allocate m_hashTable rr_lzb_simple_context c; c.m_tableSizeBits = 14; c.m_hashTable = OODLE_MALLOC_ARRAY(U16,RR_ONE_SA< typedef uint8_t U8; typedef uint16_t U16; typedef uint32_t U32; typedef uint64_t U64; typedef int8_t S8; typedef int16_t S16; typedef int32_t S32; typedef int64_t S64; typedef S64 SINTa; typedef U64 RAD_U64; typedef S64 RAD_S64; typedef U32 RAD_U32; typedef S32 RAD_S32; #define RADINLINE __inline #if defined(_MSC_VER) # define RADFORCEINLINE __forceinline #elif defined(__clang__) || defined(__GNUC__) # define RADFORCEINLINE __attribute__((always_inline)) #else # error need force inline for this compiler #endif #if _MSC_VER # define RADLZB_TRAP() __debugbreak() #elif __clang__ || __GNUC__ # define RADLZB_TRAP() __builtin_trap() #else # error Unknown trap intrinsic for this compiler. #endif #define RR_STRING_JOIN(arg1, arg2) RR_STRING_JOIN_DELAY(arg1, arg2) #define RR_STRING_JOIN_DELAY(arg1, arg2) RR_STRING_JOIN_IMMEDIATE(arg1, arg2) #define RR_STRING_JOIN_IMMEDIATE(arg1, arg2) arg1 ## arg2 #ifdef _MSC_VER #define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__COUNTER__) #else #define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__LINE__) #endif #define RR_COMPILER_ASSERT(exp) typedef char RR_NUMBERNAME(_dummy_array) [ (exp) ? 1 : -1 ] #if defined(__clang__) # define Expect(expr, val) __builtin_expect((expr), (val)) #else # define Expect(expr, val) (expr) #endif #define RAD_LIKELY(expr) Expect(expr,1) #define RAD_UNLIKELY(expr) Expect(expr,0) #define __RADLITTLEENDIAN__ 1 #define RAD_PTRBYTES 8 #define RR_MIN(a,b) ( (a) < (b) ? (a) : (b) ) #define RR_MAX(a,b) ( (a) > (b) ? (a) : (b) ) #define RR_ASSERT_ALWAYS(c) do{if(!(c)) {RADLZB_TRAP();}}while(0) #define RR_ASSERT(c) RR_ASSERT_ALWAYS(c) #define RR_PUT16_LE(ptr,val) *((U16 *)(ptr)) = (U16)(val) #define RR_GET16_LE_UNALIGNED(ptr) *((const U16 *)(ptr)) static RADINLINE U32 rrCtzBytes32(U32 val) { // Don't get fancy here. Assumes val != 0. if (val & 0x000000ffu) return 0; if (val & 0x0000ff00u) return 1; if (val & 0x00ff0000u) return 2; return 3; } static RADINLINE U32 rrCtzBytes64(U64 val) { U32 lo = (U32) val; return lo ? rrCtzBytes32(lo) : 4 + rrCtzBytes32((U32) (val >> 32)); } //~ //--------------------- typedef struct rr_lzb_simple_context rr_lzb_simple_context; struct rr_lzb_simple_context { U16 * m_hashTable; // must be allocated to sizeof(U16)*(1<magic != RDI_MAGIC_CONSTANT) { hdr = 0; result = RDI_ParseStatus_HeaderDoesNotMatch; } if(hdr != 0 && hdr->encoding_version != RDI_ENCODING_VERSION) { hdr = 0; result = RDI_ParseStatus_UnsupportedVersionNumber; } } ////////////////////////////// //- rjf: extract data sections // RDI_Section *dsecs = 0; RDI_U32 dsec_count = 0; if(result == RDI_ParseStatus_Good) { RDI_U64 opl = (RDI_U64)hdr->data_section_off + (RDI_U64)hdr->data_section_count*sizeof(*dsecs); if(opl <= size) { dsecs = (RDI_Section*)(data + hdr->data_section_off); dsec_count = hdr->data_section_count; } if(dsecs == 0) { result = RDI_ParseStatus_InvalidDataSecionLayout; } } ////////////////////////////// //- rjf: fill result // if(result == RDI_ParseStatus_Good) { out->raw_data = data; out->raw_data_size = size; out->sections = dsecs; out->sections_count = dsec_count; } return result; } //////////////////////////////// //~ Base Parsed Info Extraction Helpers //- section table/element raw data extraction RDI_PROC void * rdi_section_raw_data_from_kind(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_SectionEncoding *encoding_out, RDI_U64 *size_out) { void *result = 0; #if !defined(RDI_DISABLE_NILS) result = &rdi_nil_element_union; *size_out = rdi_section_element_size_table[kind]; #endif if(0 <= kind && kind < rdi->sections_count && rdi->sections[kind].off < rdi->raw_data_size) { result = rdi->raw_data+rdi->sections[kind].off; *size_out = rdi->sections[kind].encoded_size; *encoding_out = rdi->sections[kind].encoding; } return result; } RDI_PROC void * rdi_section_raw_table_from_kind(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 *count_out) { void *result = 0; RDI_U64 all_elements_size = 0; RDI_SectionEncoding all_elements_encoding = 0; void *all_elements = rdi_section_raw_data_from_kind(rdi, kind, &all_elements_encoding, &all_elements_size); if(all_elements_encoding == RDI_SectionEncoding_Unpacked) { RDI_U64 element_size = (RDI_U64)rdi_section_element_size_table[kind]; RDI_U64 all_elements_count = all_elements_size/element_size; result = all_elements; *count_out = all_elements_count; } return result; } RDI_PROC void * rdi_section_raw_element_from_kind_idx(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 idx) { RDI_U64 count = 0; void *table = rdi_section_raw_table_from_kind(rdi, kind, &count); void *result = table; if(idx < count) { RDI_U64 element_size = (RDI_U64)rdi_section_element_size_table[kind]; result = (RDI_U8 *)table + element_size*idx; } return result; } //- info about whole parse RDI_PROC RDI_U64 rdi_decompressed_size_from_parsed(RDI_Parsed *rdi) { RDI_U64 decompressed_size = rdi->raw_data_size; for(RDI_U64 section_idx = 0; section_idx < rdi->sections_count; section_idx += 1) { decompressed_size += (rdi->sections[section_idx].unpacked_size - rdi->sections[section_idx].encoded_size); } return decompressed_size; } //- decompression internal void rdi_decompress_parsed(U8 *decompressed_data, U64 decompressed_size, RDI_Parsed *og_rdi) { // rjf: copy header RDI_Header *src_header = (RDI_Header *)og_rdi->raw_data; RDI_Header *dst_header = (RDI_Header *)decompressed_data; { MemoryCopy(dst_header, src_header, sizeof(RDI_Header)); } // rjf: copy & adjust sections for decompressed version if(og_rdi->sections_count != 0) { RDI_Section *dsec_base = (RDI_Section *)(decompressed_data + dst_header->data_section_off); MemoryCopy(dsec_base, (U8 *)og_rdi->raw_data + src_header->data_section_off, sizeof(RDI_Section) * og_rdi->sections_count); U64 off = dst_header->data_section_off + sizeof(RDI_Section) * og_rdi->sections_count; off += 7; off -= off%8; for(U64 idx = 0; idx < og_rdi->sections_count; idx += 1) { dsec_base[idx].encoding = RDI_SectionEncoding_Unpacked; dsec_base[idx].off = off; dsec_base[idx].encoded_size = dsec_base[idx].unpacked_size; off += dsec_base[idx].unpacked_size; off += 7; off -= off%8; } } // rjf: decompress sections into new decompressed file buffer if(og_rdi->sections_count != 0) { RDI_Section *src_first = og_rdi->sections; RDI_Section *dst_first = (RDI_Section *)(decompressed_data + dst_header->data_section_off); RDI_Section *src_opl = src_first + og_rdi->sections_count; RDI_Section *dst_opl = dst_first + og_rdi->sections_count; for(RDI_Section *src = src_first, *dst = dst_first; src < src_opl && dst < dst_opl; src += 1, dst += 1) { rr_lzb_simple_decode((U8*)og_rdi->raw_data + src->off, src->encoded_size, decompressed_data + dst->off, dst->unpacked_size); } } } //- strings RDI_PROC RDI_U8 * rdi_string_from_idx(RDI_Parsed *rdi, RDI_U32 idx, RDI_U64 *len_out) { RDI_U8 *result_base = 0; RDI_U64 result_size = 0; { RDI_U64 string_offs_count = 0; RDI_U32 *string_offs = rdi_table_from_name(rdi, StringTable, &string_offs_count); if(idx < string_offs_count) { RDI_U64 string_data_size = 0; RDI_U8 *string_data = rdi_table_from_name(rdi, StringData, &string_data_size); RDI_U32 off_raw = string_offs[idx]; RDI_U32 opl_raw = string_offs[idx + 1]; RDI_U32 opl = rdi_parse__min(opl_raw, string_data_size); RDI_U32 off = rdi_parse__min(off_raw, opl); result_base = string_data + off; result_size = opl - off; } } *len_out = result_size; return result_base; } //- index runs RDI_PROC RDI_U32* rdi_idx_run_from_first_count(RDI_Parsed *rdi, RDI_U32 raw_first, RDI_U32 raw_count, RDI_U32 *n_out) { RDI_U64 idx_run_count = 0; RDI_U32 *idx_run_data = rdi_table_from_name(rdi, IndexRuns, &idx_run_count); RDI_U32 raw_opl = raw_first + raw_count; RDI_U32 opl = rdi_parse__min(raw_opl, idx_run_count); RDI_U32 first = rdi_parse__min(raw_first, opl); RDI_U32 *result = 0; if(first < idx_run_count) { result = idx_run_data + first; } *n_out = opl - first; return result; } //- line info RDI_PROC void rdi_parsed_from_line_table(RDI_Parsed *rdi, RDI_LineTable *line_table, RDI_ParsedLineTable *out) { //- rjf: extract top-level line info tables RDI_U64 all_voffs_count = 0; RDI_U64 *all_voffs = rdi_table_from_name(rdi, LineInfoVOffs, &all_voffs_count); RDI_U64 *all_voffs_opl = all_voffs + all_voffs_count; RDI_U64 all_lines_count = 0; RDI_Line *all_lines = rdi_table_from_name(rdi, LineInfoLines, &all_lines_count); RDI_Line *all_lines_opl = all_lines + all_lines_count; RDI_U64 all_cols_count = 0; RDI_Column *all_cols = rdi_table_from_name(rdi, LineInfoColumns, &all_cols_count); RDI_Column *all_cols_opl = all_cols + all_cols_count; //- rjf: extract ranges of top-level tables belonging to this line table RDI_U64 *lt_voffs = all_voffs + line_table->voffs_base_idx; RDI_Line *lt_lines = all_lines + line_table->lines_base_idx; RDI_Column *lt_cols = all_cols + line_table->cols_base_idx; RDI_U64 lines_count = line_table->lines_count; RDI_U64 cols_count = line_table->cols_count; if(lt_voffs >= all_voffs_opl) {lt_voffs = all_voffs; lines_count = 0;} if(lt_lines >= all_lines_opl) {lt_lines = all_lines; lines_count = 0;} if(lt_cols >= all_cols_opl) {lt_cols = all_cols; cols_count = 0;} //- rjf: fill result out->voffs = lt_voffs; out->lines = lt_lines; out->cols = lt_cols; out->count = lines_count; out->col_count = cols_count; } RDI_PROC RDI_U64 rdi_line_info_idx_range_from_voff(RDI_ParsedLineTable *line_info, RDI_U64 voff, RDI_U64 *n_out) { RDI_U64 result = 0; RDI_U64 n = 0; if(line_info->count > 0 && line_info->voffs[0] <= voff && voff < line_info->voffs[line_info->count - 1]) { //- rjf: find i such that: (vmap[i].voff <= voff) && (voff < vmap[i + 1].voff) // assuming: (i < j) -> (vmap[i].voff < vmap[j].voff) RDI_U32 first = 0; RDI_U32 opl = line_info->count; for(;;) { RDI_U32 mid = (first + opl)/2; if(line_info->voffs[mid] < voff) { first = mid; } else if(line_info->voffs[mid] > voff) { opl = mid; } else { first = mid; break; } if(opl - first <= 1) { break; } } result = (RDI_U64)first; //- rjf: scan leftward, to find shallowest line info matching this voff for(;result != 0;) { if(line_info->voffs[result-1] == voff) { result -= 1; } else { break; } } //- rjf: scan rightward, to count # of line info with this voff for(RDI_U64 idx = result; idx < line_info->count; idx += 1) { if(line_info->voffs[idx] == voff) { n += 1; } else { break; } } } if(n_out) { *n_out = n; } return result; } RDI_PROC RDI_U64 rdi_line_info_idx_from_voff(RDI_ParsedLineTable *line_info, RDI_U64 voff) { RDI_U64 count = 0; RDI_U64 result = rdi_line_info_idx_range_from_voff(line_info, voff, &count); for(RDI_S64 idx = count-1; idx >= 0; idx -= 1) { if(result + idx < line_info->count && line_info->lines[result+idx].file_idx != 0) { result += idx; break; } } return result; } RDI_PROC void rdi_parsed_from_source_line_map(RDI_Parsed *rdi, RDI_SourceLineMap *map, RDI_ParsedSourceLineMap *out) { //- rjf: extract top-level line info tables RDI_U64 all_nums_count = 0; RDI_U32 *all_nums = rdi_table_from_name(rdi, SourceLineMapNumbers, &all_nums_count); RDI_U32 *all_nums_opl = all_nums + all_nums_count; RDI_U64 all_rngs_count = 0; RDI_U32 *all_rngs = rdi_table_from_name(rdi, SourceLineMapRanges, &all_rngs_count); RDI_U32 *all_rngs_opl = all_rngs + all_rngs_count; RDI_U64 all_voffs_count = 0; RDI_U64 *all_voffs = rdi_table_from_name(rdi, SourceLineMapVOffs, &all_voffs_count); RDI_U64 *all_voffs_opl = all_voffs + all_voffs_count; //- rjf: extract ranges of top-level tables belonging to this line map RDI_U32 *map_nums = all_nums + map->line_map_nums_base_idx; RDI_U32 *map_rngs = all_rngs + map->line_map_range_base_idx; RDI_U64 *map_voffs= all_voffs+ map->line_map_voff_base_idx; RDI_U64 lines_count = (RDI_U64)map->line_count; RDI_U64 voffs_count = (RDI_U64)map->voff_count; if(map_nums >= all_nums_opl) {map_nums = all_nums; lines_count = 0;} if(map_rngs >= all_rngs_opl) {map_rngs = all_rngs; lines_count = 0;} if(map_voffs>= all_voffs_opl){map_voffs= all_voffs;voffs_count = 0;} //- rjf: fill result out->nums = map_nums; out->ranges = map_rngs; out->voffs = map_voffs; out->count = lines_count; out->voff_count = voffs_count; } RDI_PROC RDI_U64 * rdi_line_voffs_from_num(RDI_ParsedSourceLineMap *map, RDI_U32 linenum, RDI_U32 *n_out) { RDI_U64 *result = 0; *n_out = 0; RDI_U32 closest_i = 0; if(map->count > 0 && map->nums[0] <= linenum) { // assuming: (i < j) -> (nums[i] < nums[j]) // find i such that: (nums[i] <= linenum) && (linenum < nums[i + 1]) RDI_U32 *nums = map->nums; RDI_U32 first = 0; RDI_U32 opl = map->count; for(;;) { RDI_U32 mid = (first + opl)/2; if(nums[mid] < linenum) { first = mid; } else if(nums[mid] > linenum) { opl = mid; } else { first = mid; break; } if(opl - first <= 1) { break; } } closest_i = first; } // round up instead of down if possible if(closest_i + 1 < map->count && map->nums[closest_i] < linenum) { closest_i += 1; } // set result if possible if(closest_i < map->count) { RDI_U32 first = map->ranges[closest_i]; RDI_U32 opl = map->ranges[closest_i + 1]; if(opl <= map->voff_count) { result = map->voffs + first; *n_out = opl - first; } } return result; } //- vmap lookups RDI_PROC RDI_U64 rdi_vmap_idx_from_voff(RDI_VMapEntry *vmap, RDI_U64 vmap_count, RDI_U64 voff) { RDI_U64 result = 0; if(vmap_count > 0 && vmap[0].voff <= voff && voff < vmap[vmap_count - 1].voff) { // assuming: (i < j) -> (vmap[i].voff < vmap[j].voff) // find i such that: (vmap[i].voff <= voff) && (voff < vmap[i + 1].voff) RDI_U32 first = 0; RDI_U32 opl = vmap_count; for(;;) { RDI_U32 mid = (first + opl)/2; if(vmap[mid].voff < voff) { first = mid; } else if(vmap[mid].voff > voff) { opl = mid; } else { first = mid; break; } if(opl - first <= 1) { break; } } result = (RDI_U64)vmap[first].idx; } return result; } RDI_PROC RDI_U64 rdi_vmap_idx_from_section_kind_voff(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 voff) { RDI_U64 vmaps_count = 0; RDI_VMapEntry *vmaps = rdi_section_raw_table_from_kind(rdi, kind, &vmaps_count); RDI_U64 result = rdi_vmap_idx_from_voff(vmaps, vmaps_count, voff); return result; } //- name maps RDI_PROC void rdi_parsed_from_name_map(RDI_Parsed *rdi, RDI_NameMap *mapptr, RDI_ParsedNameMap *out) { out->buckets = 0; out->bucket_count = 0; if(mapptr != 0) { RDI_U64 all_buckets_count = 0; RDI_NameMapBucket *all_buckets = rdi_table_from_name(rdi, NameMapBuckets, &all_buckets_count); RDI_U64 all_nodes_count = 0; RDI_NameMapNode *all_nodes = rdi_table_from_name(rdi, NameMapNodes, &all_nodes_count); out->buckets = all_buckets+mapptr->bucket_base_idx; out->nodes = all_nodes+mapptr->node_base_idx; out->bucket_count = mapptr->bucket_count; out->node_count = mapptr->node_count; if(mapptr->bucket_base_idx > all_buckets_count) { out->buckets = 0; out->bucket_count = 0; } if(mapptr->node_base_idx > all_nodes_count) { out->nodes = 0; out->node_count = 0; } } } RDI_PROC RDI_NameMapNode* rdi_name_map_lookup(RDI_Parsed *p, RDI_ParsedNameMap *map, RDI_U8 *str, RDI_U64 len) { RDI_NameMapNode *result = 0; if(map->bucket_count > 0) { RDI_NameMapBucket *buckets = map->buckets; RDI_U64 bucket_count = map->bucket_count; RDI_U64 hash = rdi_hash(str, len); RDI_U64 bucket_index = hash%bucket_count; RDI_NameMapBucket *bucket = map->buckets + bucket_index; RDI_NameMapNode *node = map->nodes + bucket->first_node; RDI_NameMapNode *node_opl = node + bucket->node_count; for(;node < node_opl; node += 1) { // extract a string from this node RDI_U64 nlen = 0; RDI_U8 *nstr = rdi_string_from_idx(p, node->string_idx, &nlen); // compare this to the needle string RDI_S32 match = 0; if(nlen == len) { RDI_U8 *a = str; RDI_U8 *aopl = str + len; RDI_U8 *b = nstr; for (;a < aopl && *a == *b; a += 1, b += 1); match = (a == aopl); } // stop with a matching node in result if(match) { result = node; break; } } } return result; } RDI_PROC RDI_U32* rdi_matches_from_map_node(RDI_Parsed *p, RDI_NameMapNode *node, RDI_U32 *n_out) { RDI_U32 *result = 0; *n_out = 0; if(node != 0) { if(node->match_count == 1) { result = &node->match_idx_or_idx_run_first; *n_out = 1; } else { result = rdi_idx_run_from_first_count(p, node->match_idx_or_idx_run_first, node->match_count, n_out); } } return result; } //////////////////////////////// //~ High-Level Composite Lookup Functions //- procedures RDI_PROC RDI_Procedure * rdi_procedure_from_name(RDI_Parsed *rdi, RDI_U8 *name, RDI_U64 name_size) { RDI_NameMap *map = rdi_element_from_name_idx(rdi, NameMaps, RDI_NameMapKind_Procedures); RDI_ParsedNameMap map_parsed = {0}; rdi_parsed_from_name_map(rdi, map, &map_parsed); RDI_NameMapNode *node = rdi_name_map_lookup(rdi, &map_parsed, name, name_size); RDI_U32 id_count = 0; RDI_U32 *ids = rdi_matches_from_map_node(rdi, node, &id_count); RDI_U32 procedure_idx = 0; if(id_count > 0) { procedure_idx = ids[0]; } RDI_Procedure *procedure = rdi_element_from_name_idx(rdi, Procedures, procedure_idx); return procedure; } RDI_PROC RDI_Procedure * rdi_procedure_from_name_cstr(RDI_Parsed *rdi, char *cstr) { RDI_Procedure *result = rdi_procedure_from_name(rdi, (RDI_U8 *)cstr, rdi_cstring_length(cstr)); return result; } RDI_PROC RDI_U8 * rdi_name_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure, RDI_U64 *len_out) { return rdi_string_from_idx(rdi, procedure->name_string_idx, len_out); } RDI_PROC RDI_Scope * rdi_root_scope_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure) { RDI_Scope *scope = rdi_element_from_name_idx(rdi, Scopes, procedure->root_scope_idx); return scope; } RDI_PROC RDI_UDT * rdi_container_udt_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure) { RDI_U64 idx = 0; if(procedure->link_flags & RDI_LinkFlag_TypeScoped) { idx = procedure->container_idx; } RDI_UDT *udt = rdi_element_from_name_idx(rdi, UDTs, idx); return udt; } RDI_PROC RDI_Procedure * rdi_container_procedure_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure) { RDI_U64 idx = 0; if(procedure->link_flags & RDI_LinkFlag_ProcScoped) { idx = procedure->container_idx; } RDI_Procedure *container_procedure = rdi_element_from_name_idx(rdi, Procedures, idx); return container_procedure; } RDI_PROC RDI_U64 rdi_first_voff_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure) { RDI_Scope *scope = rdi_root_scope_from_procedure(rdi, procedure); RDI_U64 result = rdi_first_voff_from_scope(rdi, scope); return result; } RDI_PROC RDI_U64 rdi_opl_voff_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure) { RDI_Scope *scope = rdi_root_scope_from_procedure(rdi, procedure); RDI_U64 result = rdi_opl_voff_from_scope(rdi, scope); return result; } RDI_PROC RDI_Procedure * rdi_procedure_from_voff(RDI_Parsed *rdi, RDI_U64 voff) { RDI_Scope *scope = rdi_scope_from_voff(rdi, voff); RDI_Procedure *procedure = rdi_procedure_from_scope(rdi, scope); return procedure; } //- scopes RDI_PROC RDI_U64 rdi_first_voff_from_scope(RDI_Parsed *rdi, RDI_Scope *scope) { RDI_U64 *voffs = rdi_element_from_name_idx(rdi, ScopeVOffData, scope->voff_range_first); RDI_U64 result = *voffs; return result; } RDI_PROC RDI_U64 rdi_opl_voff_from_scope(RDI_Parsed *rdi, RDI_Scope *scope) { RDI_U64 result = 0; if(scope->voff_range_opl != 0) { RDI_U64 *voffs = rdi_element_from_name_idx(rdi, ScopeVOffData, scope->voff_range_opl-1); result = *voffs; } return result; } RDI_PROC RDI_Scope * rdi_scope_from_voff(RDI_Parsed *rdi, RDI_U64 voff) { RDI_U32 idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_ScopeVMap, voff); RDI_Scope *scope = rdi_element_from_name_idx(rdi, Scopes, idx); return scope; } RDI_PROC RDI_Scope * rdi_parent_from_scope(RDI_Parsed *rdi, RDI_Scope *scope) { RDI_Scope *parent = rdi_element_from_name_idx(rdi, Scopes, scope->parent_scope_idx); return parent; } RDI_PROC RDI_Procedure * rdi_procedure_from_scope(RDI_Parsed *rdi, RDI_Scope *scope) { RDI_Procedure *procedure = rdi_element_from_name_idx(rdi, Procedures, scope->proc_idx); return procedure; } RDI_PROC RDI_InlineSite * rdi_inline_site_from_scope(RDI_Parsed *rdi, RDI_Scope *scope) { RDI_InlineSite *inline_site = rdi_element_from_name_idx(rdi, InlineSites, scope->inline_site_idx); return inline_site; } //- global variables RDI_PROC RDI_GlobalVariable * rdi_global_variable_from_voff(RDI_Parsed *rdi, RDI_U64 voff) { RDI_U32 idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_GlobalVMap, voff); RDI_GlobalVariable *gvar = rdi_element_from_name_idx(rdi, GlobalVariables, idx); return gvar; } //- units RDI_PROC RDI_Unit * rdi_unit_from_voff(RDI_Parsed *rdi, RDI_U64 voff) { RDI_U32 unit_idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_UnitVMap, voff); RDI_Unit *unit = rdi_element_from_name_idx(rdi, Units, unit_idx); return unit; } RDI_PROC RDI_LineTable * rdi_line_table_from_unit(RDI_Parsed *rdi, RDI_Unit *unit) { RDI_LineTable *line_table = rdi_element_from_name_idx(rdi, LineTables, unit->line_table_idx); return line_table; } //- line info RDI_PROC RDI_Line rdi_line_from_voff(RDI_Parsed *rdi, RDI_U64 voff) { RDI_Unit *unit = rdi_unit_from_voff(rdi, voff); RDI_LineTable *line_table = rdi_line_table_from_unit(rdi, unit); RDI_Line line = rdi_line_from_line_table_voff(rdi, line_table, voff); return line; } RDI_PROC RDI_Line rdi_line_from_line_table_voff(RDI_Parsed *rdi, RDI_LineTable *line_table, RDI_U64 voff) { RDI_ParsedLineTable parsed = {0}; rdi_parsed_from_line_table(rdi, line_table, &parsed); RDI_U64 line_info_idx = rdi_line_info_idx_from_voff(&parsed, voff); RDI_Line result = {0}; if(line_info_idx < parsed.count) { result = parsed.lines[line_info_idx]; } return result; } RDI_PROC RDI_SourceFile * rdi_source_file_from_line(RDI_Parsed *rdi, RDI_Line *line) { RDI_SourceFile *result = rdi_element_from_name_idx(rdi, SourceFiles, line->file_idx); return result; } //- source files RDI_PROC RDI_SourceFile * rdi_source_file_from_normal_path(RDI_Parsed *rdi, RDI_U8 *name, RDI_U64 name_size) { RDI_NameMap *map = rdi_element_from_name_idx(rdi, NameMaps, RDI_NameMapKind_NormalSourcePaths); RDI_ParsedNameMap map_parsed = {0}; rdi_parsed_from_name_map(rdi, map, &map_parsed); RDI_NameMapNode *node = rdi_name_map_lookup(rdi, &map_parsed, name, name_size); RDI_U32 id_count = 0; RDI_U32 *ids = rdi_matches_from_map_node(rdi, node, &id_count); RDI_U32 file_idx = 0; if(id_count > 0) { file_idx = ids[0]; } RDI_SourceFile *file = rdi_element_from_name_idx(rdi, SourceFiles, file_idx); return file; } RDI_PROC RDI_SourceFile * rdi_source_file_from_normal_path_cstr(RDI_Parsed *rdi, char *cstr) { RDI_SourceFile *result = rdi_source_file_from_normal_path(rdi, (RDI_U8 *)cstr, rdi_cstring_length(cstr)); return result; } RDI_PROC RDI_U8 * rdi_normal_path_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file, RDI_U64 *len_out) { return rdi_string_from_idx(rdi, src_file->normal_full_path_string_idx, len_out); } RDI_PROC RDI_FilePathNode * rdi_file_path_node_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file) { RDI_FilePathNode *result = rdi_element_from_name_idx(rdi, FilePathNodes, src_file->file_path_node_idx); return result; } RDI_PROC RDI_SourceLineMap * rdi_source_line_map_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file) { RDI_SourceLineMap *result = rdi_element_from_name_idx(rdi, SourceLineMaps, src_file->source_line_map_idx); return result; } RDI_PROC RDI_U64 rdi_first_voff_from_source_file_line_num(RDI_Parsed *rdi, RDI_SourceFile *src_file, RDI_U32 line_num) { RDI_SourceLineMap *source_line_map = rdi_source_line_map_from_source_file(rdi, src_file); RDI_U64 voff = rdi_first_voff_from_source_line_map_num(rdi, source_line_map, line_num); return voff; } //- source line maps RDI_PROC RDI_U64 rdi_first_voff_from_source_line_map_num(RDI_Parsed *rdi, RDI_SourceLineMap *map, RDI_U32 line_num) { RDI_ParsedSourceLineMap parsed = {0}; rdi_parsed_from_source_line_map(rdi, map, &parsed); RDI_U32 all_voffs_count = 0; RDI_U64 *all_voffs = rdi_line_voffs_from_num(&parsed, line_num, &all_voffs_count); RDI_U64 voff = 0; if(all_voffs_count != 0) { voff = all_voffs[0]; } return voff; } //- file path nodes RDI_PROC RDI_FilePathNode * rdi_parent_from_file_path_node(RDI_Parsed *rdi, RDI_FilePathNode *node) { RDI_FilePathNode *result = rdi_element_from_name_idx(rdi, FilePathNodes, node->parent_path_node); return result; } RDI_PROC RDI_U8 * rdi_name_from_file_path_node(RDI_Parsed *rdi, RDI_FilePathNode *node, RDI_U64 *len_out) { return rdi_string_from_idx(rdi, node->name_string_idx, len_out); } //////////////////////////////// //~ Parser Helpers RDI_PROC RDI_U64 rdi_cstring_length(char *cstr) { RDI_U64 result = 0; for(;cstr[result] != 0; result += 1){} return result; } RDI_PROC RDI_U64 rdi_size_from_bytecode_stream(RDI_U8 *ptr, RDI_U8 *opl) { RDI_U64 bytecode_size = 0; RDI_U8 *off_first = ptr + sizeof(RDI_LocationKind); for(RDI_U8 *off = off_first, *next_off = opl; off < opl; off = next_off) { RDI_U8 op = *off; if(op == 0) { break; } RDI_U16 ctrlbits = rdi_eval_op_ctrlbits_table[op]; RDI_U32 p_size = RDI_DECODEN_FROM_CTRLBITS(ctrlbits); bytecode_size += (1 + p_size); next_off = (off + 1 + p_size); } return bytecode_size; } //////////////////////////////// //~ Compression/Decompression Implementation #include //------------------------------------------------- // UINTr = int the size of a register #ifdef __RAD64REGS__ #define RAD_UINTr RAD_U64 #define RAD_SINTr RAD_S64 #define readR read64 #define writeR write64 #define rrClzBytesR rrClzBytes64 #define rrCtzBytesR rrCtzBytes64 #else #define RAD_UINTr RAD_U32 #define RAD_SINTr RAD_S32 #define readR read32 #define writeR write32 #define rrClzBytesR rrClzBytes32 #define rrCtzBytesR rrCtzBytes32 #endif typedef RAD_SINTr SINTr; typedef RAD_UINTr UINTr; #define OOINLINE RADFORCEINLINE #define if_unlikely(exp) if ( RAD_UNLIKELY( exp ) ) #define if_likely( exp) if ( RAD_LIKELY( exp ) ) // Raw byte IO #if defined(__RADARM__) && !defined(__RAD64__) && defined(__GNUC__) // older GCCs don't turn the memcpy variant into loads/stores, but // they do support this: typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unaligned_type; static inline U16 read16(const void *ptr) { return ((const unaligned_type *)ptr)->u16; } static inline void write16(void *ptr, U16 x) { ((unaligned_type *)ptr)->u16 = x; } static inline U32 read32(const void *ptr) { return ((const unaligned_type *)ptr)->u32; } static inline void write32(void *ptr, U32 x) { ((unaligned_type *)ptr)->u32 = x; } static inline U64 read64(const void *ptr) { return ((const unaligned_type *)ptr)->u64; } static inline void write64(void *ptr, U64 x) { ((unaligned_type *)ptr)->u64 = x; } #else // most C compilers we target are smart enough to turn this into single loads/stores static inline U16 read16(const void *ptr) { U16 x; memcpy(&x, ptr, sizeof(x)); return x; } static inline void write16(void *ptr, U16 x) { memcpy(ptr, &x, sizeof(x)); } static inline U32 read32(const void *ptr) { U32 x; memcpy(&x, ptr, sizeof(x)); return x; } static inline void write32(void *ptr, U32 x) { memcpy(ptr, &x, sizeof(x)); } static inline U64 read64(const void *ptr) { U64 x; memcpy(&x, ptr, sizeof(x)); return x; } static inline void write64(void *ptr, U64 x) { memcpy(ptr, &x, sizeof(x)); } #endif #define RR_PUT16_LE_UNALIGNED(ptr,val) RR_PUT16_LE(ptr,val) #define RR_PUT16_LE_UNALIGNED_OFFSET(ptr,val,offset) RR_PUT16_LE_OFFSET(ptr,val,offset) //=========================================================================== static RADINLINE SINTa rrPtrDiffV(void * end, void *start) { return (SINTa)( ((char *)(end)) - ((char *)(start)) ); } // helper function to show I really am intending to put a pointer difference in an int : static RADINLINE SINTa rrPtrDiff(SINTa val) { return val; } static RADINLINE S32 rrPtrDiff32(SINTa val) { S32 ret = (S32) val; RR_ASSERT( (SINTa)ret == val ); return ret; } static RADINLINE SINTr rrPtrDiffR(SINTa val) { SINTr ret = (SINTr) val; RR_ASSERT( (SINTa)ret == val ); return ret; } //================================================================= #define LZB_LRL_BITS 4 #define LZB_LRL_ESCAPE 15 #define LZB_ML_BITS 4 #define LZB_MLCONTROL_ESCAPE 15 #define LZB_SLIDING_WINDOW_POW2 16 #define LZB_SLIDING_WINDOW_SIZE (1<>= 6; \ if ( val < 128 ) *cp++ = (U8) val; \ else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \ if ( val < 128 ) *cp++ = (U8) val; \ else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \ if ( val < 128 ) *cp++ = (U8) val; \ else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; *cp++ = (U8) val; } } } } \ } while(0) // max bytes consumed: 5 #define LZB_AddExcessBW(cp,val) do { U32 b = *cp++; \ if ( b < 192 ) val += b; \ else { val += 192; val += (b-192); b = *cp++; \ val += (b<<6); if ( b >= 128 ) { b = *cp++; \ val += (b<<13); if ( b >= 128 ) { b = *cp++; \ val += (b<<20); if ( b >= 128 ) { b = *cp++; \ val += (b<<27); } } } } \ } while(0) #define LZB_PutExcessLRL(cp,val) LZB_PutExcessBW(cp,val) #define LZB_PutExcessML(cp,val) LZB_PutExcessBW(cp,val) #define LZB_AddExcessLRL(cp,val) LZB_AddExcessBW(cp,val) #define LZB_AddExcessML(cp,val) LZB_AddExcessBW(cp,val) //============================================================================= // match copies : // used for LRL : static OOINLINE void copy_no_overlap_long(U8 * to, const U8 * from, SINTr length) { for(int i=0;i= LZB_MML && ml < LZB_MATCHLEN_ESCAPE ); // overlap // @@ err not awesome to[0] = from[0]; to[1] = from[1]; to[2] = from[2]; to[3] = from[3]; to[4] = from[4]; to[5] = from[5]; to[6] = from[6]; to[7] = from[7]; if ( ml > 8 ) { to += 8; from += 8; ml -= 8; // max of 10 more while(ml--) { *to++ = *from++; } } } static OOINLINE void copy_match_memset(U8 * to, int c, SINTr ml) { RR_ASSERT( ml >= 4 ); U32 four = c * 0x01010101; U8 * end = to + ml; write32(to, four); to += 4; while(to>4); // copy 4 literals speculatively : write32( rp , read32(cp) ); //RR_ASSERT( lrl >= 8 || ml_control >= 8 ); if ( lrl > 4 ) { // if lrl was <= 8 we did it, else need this : if_unlikely ( lrl > 8 ) { if_unlikely ( lrl >= LZB_LRL_ESCAPE ) { LZB_AddExcessLRL( cp, lrl ); // hide the EOF check here ? // has to be after the GetExcess if_unlikely ( rp+lrl >= rpEnd ) { RR_ASSERT( rp+lrl == rpEnd ); copy_no_overlap_nooverrun(rp,cp,lrl); rp += lrl; cp += lrl; break; } else { // total undo of the previous copy copy_no_overlap_long(rp,cp,lrl); } } else // > 8 but not 0xF { // hide the EOF check here ? if_unlikely ( rp+lrl >= rpEnd ) { if ( lrl == 9 ) { // may be a false 9 lrl = rrPtrDiff32( rpEnd - rp ); } RR_ASSERT( rp+lrl == rpEnd ); copy_no_overlap_nooverrun(rp,cp,lrl); rp += lrl; cp += lrl; break; } else { write32( rp+4 , read32(cp+4) ); // put 8 more : write64( (rp+8) , read64((cp+8)) ); } } } else { write32( rp+4 , read32(cp+4) ); } } rp += lrl; cp += lrl; RR_ASSERT( rp+LZB_MML <= rpEnd ); UINTr ml = ml_control + LZB_MML; // speculatively grab offset but don't advance cp yet UINTr off = RR_GET16_LE_UNALIGNED(cp); if ( ml_control <= 8 ) { cp += 2; // consume offset const U8 * match = rp - off; RR_ASSERT( ml <= 12 ); write64( rp , read64(match) ); write32( rp+8 , read32(match+8) ); rp += ml; continue; } else { if_likely( ml_control < LZB_MLCONTROL_ESCAPE ) // short match { cp += 2; // consume offset const U8 * match = rp - off; RR_ASSERT( off >= 8 || ml <= off ); write64( rp , read64(match) ); write64( rp+8 , read64(match+8) ); if ( ml > 16 ) { write16( rp+16, read16(match+16) ); } } else { // get 1-byte excess code UINTr excesslow = off&127; cp++; // consume 1 //if ( excess1 >= 128 ) if ( off & 128 ) { ml_control = excesslow >> 3; ml = ml_control + LZB_MML; if ( ml_control == 0xF ) { // get more ml LZB_AddExcessML( cp, ml ); } UINTr myoff = off & 7; // low offset, can't do 8-byte grabs if ( myoff == 1 ) { int c = rp[-1]; copy_match_memset(rp,c,ml); } else { // shit but whatever, very rare for(UINTr i=0;i>13); return h; } #define HashMatchFinder_Hash32 hmf_hash4_32 //================================================================================= #define LZB_Hash4 hmf_hash4_32 static RADINLINE U32 LZB_SecondHash4(U32 be4) { const U32 m = 0x5bd1e995; U32 h = be4 * m; h += (h>>11); return h; } //============================================= static int RADFORCEINLINE GetNumBytesZeroNeverAllR(UINTr x) { RR_ASSERT( x != 0 ); #if defined(__RADBIGENDIAN__) // big endian, so earlier bytes are at the top int nb = (int)rrClzBytesR(x); #elif defined(__RADLITTLEENDIAN__) // little endian, so earlier bytes are at the bottom int nb = (int)rrCtzBytesR(x); #else #error wtf no endian set #endif RR_ASSERT( nb >= 0 && nb < (int)sizeof(UINTr) ); return nb; } //=============================== static RADFORCEINLINE U8 * LZB_Output(U8 * cp, S32 lrl, const U8 * literals, S32 matchlen , S32 mo ) { RR_ASSERT( lrl >= 0 ); RR_ASSERT( matchlen >= LZB_MML ); RR_ASSERT( mo > 0 && mo <= LZB_MAX_OFFSET ); //rrprintf("[%3d][%3d][%7d]\n",lrl,ml,mo); S32 sendml = matchlen - LZB_MML; U32 ml_in_control = RR_MIN(sendml,LZB_MLCONTROL_ESCAPE); if ( mo >= 8 ) // no overlap { if ( lrl < LZB_LRL_ESCAPE ) { U32 control = lrl | (ml_in_control<<4); *cp++ = (U8) control; write64(cp, read64(literals)); if ( lrl > 8 ) { write64(cp+8, read64(literals+8)); } cp += lrl; } else { U32 control = LZB_LRL_ESCAPE | (ml_in_control<<4); *cp++ = (U8) control; U32 lrl_excess = lrl - LZB_LRL_ESCAPE; LZB_PutExcessLRL(cp,lrl_excess); // @@ ? is this okay for overrun ? lz_copysteptoend_overrunok(cp,literals,lrl); } if ( ml_in_control < LZB_MLCONTROL_ESCAPE ) { RR_ASSERT( (U16)(mo) == mo ); RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); cp += 2; } else { U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE; // put special first byte, then offset, then remainder if ( ml_excess < 127 ) { *cp++ = (U8)ml_excess; RR_ASSERT( (U16)(mo) == mo ); RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); cp += 2; } else { *cp++ = (U8)127; RR_ASSERT( (U16)(mo) == mo ); RR_PUT16_LE_UNALIGNED(cp,(U16)(mo)); cp += 2; ml_excess -= 127; LZB_PutExcessML(cp,ml_excess); } } } else { U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE); // overlap case U32 control = (lrl_in_control) | (LZB_MLCONTROL_ESCAPE<<4); *cp++ = (U8) control; if ( lrl_in_control == LZB_LRL_ESCAPE ) { U32 lrl_excess = lrl - LZB_LRL_ESCAPE; LZB_PutExcessLRL(cp,lrl_excess); } lz_copysteptoend_overrunok(cp,literals,lrl); //cp += lrl; // special excess1 : UINTr excess1 = 128 + (ml_in_control<<3) + mo; RR_ASSERT( excess1 < 256 ); *cp++ = (U8)excess1; if ( ml_in_control == LZB_MLCONTROL_ESCAPE ) { U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE; LZB_PutExcessML(cp,ml_excess); } } return cp; } #if LZB_FORCELASTLRL9 static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals ) { RR_ASSERT( lrl >= 0 ); //U32 ml = 0; //U32 mo = 0; U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE); #if LZB_END_WITH_LITERALS // lrl_in_control must be at least 9 lrl_in_control = RR_MAX(lrl_in_control,9); #endif U32 control = lrl_in_control; *cp++ = (U8) control; if ( lrl_in_control == LZB_LRL_ESCAPE ) { U32 lrl_excess = lrl - LZB_LRL_ESCAPE; LZB_PutExcessLRL(cp,lrl_excess); } memmove(cp,literals,lrl); cp += lrl; return cp; } #else static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals ) { cp = LZB_Output(cp,lrl,literals,LZB_MML,1); // remove the offset we put : cp -= 2; return cp; } #endif //=============================================================== static void rr_lzb_simple_context_init(rr_lzb_simple_context * ctx) //, const void * base) { RR_ASSERT( ctx->m_tableSizeBits >= 12 && ctx->m_tableSizeBits <= 24 ); memset(ctx->m_hashTable,0,sizeof(U16)*((SINTa)1<m_tableSizeBits)); } //=============================================================== /* #define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression, #define DO_FAST_2ND_HASH // rate= 30.69 mb/s , 15451369 <- turning this off is the best way to get more speed and less compression /*/ #define FAST_HASH_DEPTH_SHIFT (0) #define DO_FAST_2ND_HASH /**/ // lzt99, 24700820, 15475520, 16677179 //encode only : 0.880 seconds, 1.62 b/hc, rate= 28.08 mb/s //#define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression, but slower #define DO_FAST_UPDATE_MATCH_HASHES 1 // helps compression a lot , like 0.30 //#define DO_FAST_UPDATE_MATCH_HASHES 2 // helps compression a lot , like 0.30 #define DO_FAST_LAZY_MATCH // also helps a lot , like 0.15 #define DO_FAST_HASH_DWORD 1 #define FAST_MULTISTEP_LITERALS_SHIFT (5) //----------------------- // derived : /* #define FAST_HASH_BITS (FAST_HASH_TOTAL_BITS-FAST_HASH_DEPTH_SHIFT) #define FAST_HASH_SIZE (1< 1 #define FAST_HASH_INDEX(h,d) ( ((h)< 1 int hashCycle = 0; #endif U16 * hashTable16 = fh->m_hashTable; int hashTableSizeBits = fh->m_tableSizeBits; U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits - FAST_HASH_DEPTH_SHIFT)) - 1); const U8 * zeroPosPtr = (const U8 *)raw; // first byte is always a literal rp++; for(;;) { S32 matchOff; UINTr failedMatches = (1<= 0 ); #ifdef DO_FAST_2ND_HASH hash2 = ( LZB_SecondHash4(rp32) ) & hash_table_mask; #endif #if FAST_HASH_DEPTH > 1 for(int d=0;d= 0 ); hashrp = rp - matchOff; //if ( matchOff <= LZB_MAX_OFFSET ) RR_ASSERT( matchOff <= LZB_MAX_OFFSET ); { const U32 hashrp32 = read32(hashrp); if ( rp32 == hashrp32 && matchOff != 0 ) { goto found_match; } } } #ifdef DO_FAST_2ND_HASH #if FAST_HASH_DEPTH > 1 for(int d=0;d= 0 ); hashrp = rp - matchOff; RR_ASSERT( matchOff <= LZB_MAX_OFFSET ); { const U32 hashrp32 = read32(hashrp); if ( rp32 == hashrp32 && matchOff != 0 ) { goto found_match; } } } #endif //--------------------------- // update hash : hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos; #ifdef DO_FAST_2ND_HASH // do NOT step hashCycle ! //hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos; #endif #if FAST_HASH_DEPTH > 1 hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; #endif UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT); RR_ASSERT( stepLiterals >= 1 ); ++failedMatches; rp += stepLiterals; if ( rp >= rpEndSafe ) goto done; rp32 = read32(rp); hash = FAST_HASH_FUNC(rp, rp32 ); } //------------------------------- found_match: // found something //------------------------- // update hash now so lazy can see it : #if 1 // pretty important to compression hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos; #ifdef DO_FAST_2ND_HASH // do NOT step hashCycle ! //hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos; #endif #if FAST_HASH_DEPTH > 1 hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; #endif #endif //----------------------------------- const U8 * match_start = rp; rp += 4; while( rp < rpEndSafe ) { UINTr big1 = readR(rp); UINTr big2 = readR(rp-matchOff); if ( big1 == big2 ) { rp += RAD_PTRBYTES; continue; } else { rp += GetNumBytesZeroNeverAllR(big1^big2); break; } } rp = RR_MIN(rp,rpMatchEnd); //------------------------------- // rp is now at the *end* of the match //------------------------------- // check lazy match too #ifdef DO_FAST_LAZY_MATCH if (rp< rpEndSafe) { const U8 * lazyrp = match_start + 1; //SINTa lazypos = rrPtrDiff(lazyrp - zeroPosPtr); SINTa lazypos = curpos + 1; RR_ASSERT( lazypos == rrPtrDiff(lazyrp - zeroPosPtr) ); U32 lazyrp32 = read32(lazyrp); const U8 * lazyhashrp; SINTa lazymatchOff; U32 lazyHash = FAST_HASH_FUNC(lazyrp, lazyrp32 ); #ifdef DO_FAST_2ND_HASH U32 lazyhash2 = LZB_SecondHash4(lazyrp32) & hash_table_mask; #endif #if FAST_HASH_DEPTH > 1 for(int d=0;d= 0 ); RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET ); { lazyhashrp = lazyrp - lazymatchOff; const U32 hashrp32 = read32(lazyhashrp); if ( lazyrp32 == hashrp32 && lazymatchOff != 0 ) { goto lazy_found_match; } } } #ifdef DO_FAST_2ND_HASH #if FAST_HASH_DEPTH > 1 for(int d=0;d= 0 ); RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET ); { lazyhashrp = lazyrp - lazymatchOff; const U32 hashrp32 = read32(lazyhashrp); if ( lazyrp32 == hashrp32 && lazymatchOff != 0 ) { goto lazy_found_match; } } } #endif if ( 0 ) { lazy_found_match: lazyrp += 4; while( lazyrp < rpEndSafe ) { UINTr big1 = readR(lazyrp); UINTr big2 = readR(lazyrp-lazymatchOff); if ( big1 == big2 ) { lazyrp += RAD_PTRBYTES; continue; } else { lazyrp += GetNumBytesZeroNeverAllR(big1^big2); break; } } lazyrp = RR_MIN(lazyrp,rpMatchEnd); //S32 lazymatchLen = rrPtrDiff32( lazyrp - (match_start+1) ); //RR_ASSERT( lazymatchLen >= 4 ); if ( lazyrp >= rp+3 ) { // yes take the lazy match // put a literal : match_start++; // I had a bug where lazypos was set wrong for the hash fill // it set it to the *end* of the normal match // and for some reason that helped compression WTF WTF //SINTa lazypos = rrPtrDiff(rp - zeroPosPtr); // 233647528 // with correct lazypos : 233651228 // really this shouldn't be necessary at all // because I do an update of hash at all positions in the match including first! #if 1 // with update disabled - 233690274 hashTable16[ FAST_HASH_INDEX(lazyHash,hashCycle) ] = (U16) lazypos; #ifdef DO_FAST_2ND_HASH // do NOT step hashCycle ! hashTable16[ FAST_HASH_INDEX(lazyhash2,hashCycle) ] = (U16) lazypos; #endif #if FAST_HASH_DEPTH > 1 hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK; #endif #endif // and then drop out and do the lazy match : //matchLen = lazymatchLen; matchOff = (S32)lazymatchOff; rp = lazyrp; hashrp = lazyhashrp; } } } #endif //--------------------------------------------------- // back up start of match that we missed due to stepLiterals ! // make sure we don't read off the start of the array // this costs a little speed and gains a little compression // 15662162 at 121.58 mb/s // 15776473 at 127.92 mb/s #if 1 /* lzbf : 24,700,820 ->15,963,503 = 5.170 bpb = 1.547 to 1 encode : 0.171 seconds, 83.60 b/kc, rate= 144.54 M/s decode : 0.014 seconds, 1002.64 b/kc, rate= 1733.57 M/s */ { // 144 M/s // back up start of match that we missed // make sure we don't read off the start of the array const U8 * rpm1 = match_start-1; if ( rpm1 >= literals_start && hashrp > zeroPosPtr && rpm1[0] == hashrp[-1] ) { rpm1--; hashrp-= 2; while ( rpm1 >= literals_start && hashrp >= zeroPosPtr && rpm1[0] == *hashrp ) { rpm1--; hashrp--; } match_start = rpm1+1; //rp = RR_MAX(rp,literals_start); RR_ASSERT( match_start >= literals_start ); } } #endif S32 matchLen = rrPtrDiff32( rp - match_start ); RR_ASSERT( matchLen >= 4 ); //=============================================== // chose a match // output LRL (if any) and match S32 cur_lrl = rrPtrDiff32(match_start - literals_start); // catch expansion while writing : if_unlikely ( cp+cur_lrl >= compExpandedPtr ) { return rawLen+1; } cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff); // skip the match : literals_start = rp; if ( rp >= rpEndSafe ) break; // step & update hashes : // (I already did cur pos) #ifdef DO_FAST_UPDATE_MATCH_HASHES // don't bother if it takes us to the end : // (this check is not for speed it's to avoid the access violation) const U8 * ptr = match_start+1; U16 pos16 = (U16) rrPtrDiff( ptr - zeroPosPtr ); for(;ptr 0 ); #endif if ( cur_lrl > 0 ) { // catch expansion while writing : if ( cp+cur_lrl >= compExpandedPtr ) { return rawLen+1; } cp = LZB_OutputLast(cp,cur_lrl,literals_start); } SINTa compLen = rrPtrDiff( cp - (U8 *)comp ); return compLen; } SINTa rr_lzb_simple_encode_fast(rr_lzb_simple_context * fh, const void * raw, SINTa rawLen, void * comp) { rr_lzb_simple_context_init(fh); //,raw); SINTa comp_len = rr_lzb_simple_encode_fast_sub(fh,raw,rawLen,comp); if ( comp_len >= rawLen ) { memcpy(comp,raw,rawLen); return rawLen; } return comp_len; } #undef FAST_HASH_DEPTH_SHIFT #undef DO_FAST_UPDATE_MATCH_HASHES #undef DO_FAST_LAZY_MATCH #undef DO_FAST_2ND_HASH //===================================================== #define FAST_HASH_DEPTH_SHIFT (0) #undef FAST_MULTISTEP_LITERALS_SHIFT #define FAST_MULTISTEP_LITERALS_SHIFT (4) //----------------------- // derived : RR_COMPILER_ASSERT( FAST_HASH_DEPTH_SHIFT == 0 ); #undef FAST_HASH_FUNC //#define FAST_HASH_FUNC(ptr,dword) ( LZB_Hash4(dword) & hash_table_mask ) #define FAST_HASH_FUNC(ptr,dword) ( (((dword)*2654435761U)>>16) & hash_table_mask ) // @@@@ ???? #define LZBVF_DO_BACKUP 0 //#define LZBVF_DO_BACKUP 1 static SINTa rr_lzb_simple_encode_veryfast_sub(rr_lzb_simple_context * fh, const void * raw, SINTa rawLen, void * comp) { //SIMPLEPROFILE_SCOPE_N(lzbfast_sub,rawLen); //THREADPROFILEFUNC(); U8 * cp = (U8 *)comp; U8 * compExpandedPtr = cp + rawLen - 8; const U8 * rp = (const U8 *)raw; const U8 * rpEnd = rp+rawLen; // we can match up to rpEnd // but matches can't start past rpEndSafe const U8 * rpMatchEnd = rpEnd - LZB_END_OF_BLOCK_NO_MATCH_ZONE; const U8 * rpEndSafe = rpMatchEnd - LZB_MML; if ( rpEndSafe <= (U8 *)raw ) { // can't compress return rawLen+1; } const U8 * literals_start = rp; U16 * hashTable16 = fh->m_hashTable; int hashTableSizeBits = fh->m_tableSizeBits; U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits)) - 1); const U8 * zeroPosPtr = (const U8 *)raw; // first byte is always a literal rp++; for(;;) { U32 rp32 = read32(rp); U32 hash = FAST_HASH_FUNC(rp, rp32 ); const U8 * hashrp; S32 matchOff; UINTr failedMatches; // loop while no match found : // first loop with step = 1 // @@ //int step1count = (1<= 0 ); U16 hashpos16 = hashTable16[hash]; hashTable16[ hash ] = (U16) curpos; matchOff = (U16)(curpos - hashpos16); RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET ); hashrp = rp - matchOff; const U32 hashrp32 = read32(hashrp); if ( rp32 == hashrp32 && matchOff != 0 ) { goto found_match; } if ( ++rp >= rpEndSafe ) goto done; rp32 = read32(rp); hash = FAST_HASH_FUNC(rp, rp32 ); } // step starts at 2 : failedMatches = (2<= 0 ); U16 hashpos16 = hashTable16[hash]; hashTable16[ hash ] = (U16) curpos; matchOff = (U16)(curpos - hashpos16); RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET ); hashrp = rp - matchOff; const U32 hashrp32 = read32(hashrp); if ( rp32 == hashrp32 && matchOff != 0 ) { goto found_match; } UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT); RR_ASSERT( stepLiterals >= 1 ); ++failedMatches; rp += stepLiterals; if ( rp >= rpEndSafe ) goto done; rp32 = read32(rp); hash = FAST_HASH_FUNC(rp, rp32 ); } //------------------------------- found_match:; // found something #if LZBVF_DO_BACKUP // alternative backup using counter : S32 cur_lrl = rrPtrDiff32(rp - literals_start); int neg_max_backup = - RR_MIN(cur_lrl , rrPtrDiff32(hashrp - zeroPosPtr) ); int neg_backup = -1; if( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] ) { neg_backup--; while( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] ) { neg_backup--; } neg_backup++; rp += neg_backup; cur_lrl += neg_backup; RR_ASSERT( cur_lrl >= 0 ); RR_ASSERT( cur_lrl == rrPtrDiff32(rp - literals_start) ); } #else S32 cur_lrl = rrPtrDiff32(rp - literals_start); #endif // catch expansion while writing : if_unlikely ( cp+cur_lrl >= compExpandedPtr ) { return rawLen+1; } RR_ASSERT( matchOff >= 1 ); //--------------------------------------- // find rest of match len // save pointer to start of match // walk rp ahead to end of match const U8 * match_start = rp; rp += 4; while( rp < rpEndSafe ) { UINTr big1 = readR(rp); UINTr big2 = readR(rp-matchOff); if ( big1 == big2 ) { rp += RAD_PTRBYTES; continue; } else { rp += GetNumBytesZeroNeverAllR(big1^big2); break; } } rp = RR_MIN(rp,rpMatchEnd); S32 matchLen = rrPtrDiff32( rp - match_start ); //=============================================== // chose a match // output LRL (if any) and match cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff); // skip the match : literals_start = rp; if ( rp >= rpEndSafe ) goto done; } done:; int cur_lrl = rrPtrDiff32(rpEnd - literals_start); #if LZB_END_WITH_LITERALS RR_ASSERT_ALWAYS(cur_lrl > 0 ); #endif if ( cur_lrl > 0 ) { // catch expansion while writing : if ( cp+cur_lrl >= compExpandedPtr ) { return rawLen+1; } cp = LZB_OutputLast(cp,cur_lrl,literals_start); } SINTa compLen = rrPtrDiff( cp - (U8 *)comp ); return compLen; } SINTa rr_lzb_simple_encode_veryfast(rr_lzb_simple_context * fh, const void * raw, SINTa rawLen, void * comp) { rr_lzb_simple_context_init(fh); //,raw); SINTa comp_len = rr_lzb_simple_encode_veryfast_sub(fh,raw,rawLen,comp); if ( comp_len >= rawLen ) { memcpy(comp,raw,rawLen); return rawLen; } return comp_len; } #undef FAST_HASH_DEPTH_SHIFT #undef DO_FAST_UPDATE_MATCH_HASHES #undef DO_FAST_LAZY_MATCH #undef DO_FAST_2ND_HASH //===================================================== // vim:noet:sw=4:ts=4