Files
raddebugger/src/lib_rdi/rdi_parse.c
T

2451 lines
61 KiB
C

// Copyright (c) Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)
////////////////////////////////
//~ Compression/Decompression Forward-Declares
#ifndef _RAD_LZB_SIMPLE_H_
#define _RAD_LZB_SIMPLE_H_
/*======================================================
To encode :
Set up an rr_lzb_simple_context
fill out m_tableSizeBits (14-16 is typical)
allocate m_hashTable
rr_lzb_simple_context c;
c.m_tableSizeBits = 14;
c.m_hashTable = OODLE_MALLOC_ARRAY(U16,RR_ONE_SA<<c.m_tableSizeBits);
then call _encode
NOTE :
compressed & raw size are not included in the encoded bytes. You must send
them separately.
NOTE :
lzb will never expand. comp_len is <= raw_len strictly.
if comp_len = raw_len it indicates that the compressed bytes are just a memcpy
of the raw bytes. In that case you do not need to decode.
To decode :
if comp_len is == raw_len, then the compressed bytes are just a copy of the
raw bytes and you could use them directly without calling decode.
if you call rr_lzb_simple_decode in that case, then the compressed buffer will
be memcpy'd to the raw buffer
===============================================================*/
//~ TODO(rjf): temporary glue for building this without the shared rad code:
#define __RAD64REGS__
#include <stdint.h>
typedef uint8_t U8;
typedef uint16_t U16;
typedef uint32_t U32;
typedef uint64_t U64;
typedef int8_t S8;
typedef int16_t S16;
typedef int32_t S32;
typedef int64_t S64;
typedef S64 SINTa;
typedef U64 RAD_U64;
typedef S64 RAD_S64;
typedef U32 RAD_U32;
typedef S32 RAD_S32;
#define RADINLINE __inline
#if defined(_MSC_VER)
# define RADFORCEINLINE __forceinline
#elif defined(__clang__) || defined(__GNUC__)
# define RADFORCEINLINE __attribute__((always_inline))
#else
# error need force inline for this compiler
#endif
#if _MSC_VER
# define RADLZB_TRAP() __debugbreak()
#elif __clang__ || __GNUC__
# define RADLZB_TRAP() __builtin_trap()
#else
# error Unknown trap intrinsic for this compiler.
#endif
#define RR_STRING_JOIN(arg1, arg2) RR_STRING_JOIN_DELAY(arg1, arg2)
#define RR_STRING_JOIN_DELAY(arg1, arg2) RR_STRING_JOIN_IMMEDIATE(arg1, arg2)
#define RR_STRING_JOIN_IMMEDIATE(arg1, arg2) arg1 ## arg2
#ifdef _MSC_VER
#define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__COUNTER__)
#else
#define RR_NUMBERNAME(name) RR_STRING_JOIN(name,__LINE__)
#endif
#define RR_COMPILER_ASSERT(exp) typedef char RR_NUMBERNAME(_dummy_array) [ (exp) ? 1 : -1 ]
#if defined(__clang__)
# define Expect(expr, val) __builtin_expect((expr), (val))
#else
# define Expect(expr, val) (expr)
#endif
#define RAD_LIKELY(expr) Expect(expr,1)
#define RAD_UNLIKELY(expr) Expect(expr,0)
#define __RADLITTLEENDIAN__ 1
#define RAD_PTRBYTES 8
#define RR_MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define RR_MAX(a,b) ( (a) > (b) ? (a) : (b) )
#define RR_ASSERT_ALWAYS(c) do{if(!(c)) {RADLZB_TRAP();}}while(0)
#define RR_ASSERT(c) RR_ASSERT_ALWAYS(c)
#define RR_PUT16_LE(ptr,val) *((U16 *)(ptr)) = (U16)(val)
#define RR_GET16_LE_UNALIGNED(ptr) *((const U16 *)(ptr))
static RADINLINE U32
rrCtzBytes32(U32 val)
{
// Don't get fancy here. Assumes val != 0.
if (val & 0x000000ffu) return 0;
if (val & 0x0000ff00u) return 1;
if (val & 0x00ff0000u) return 2;
return 3;
}
static RADINLINE U32
rrCtzBytes64(U64 val)
{
U32 lo = (U32) val;
return lo ? rrCtzBytes32(lo) : 4 + rrCtzBytes32((U32) (val >> 32));
}
//~
//---------------------
typedef struct rr_lzb_simple_context rr_lzb_simple_context;
struct rr_lzb_simple_context
{
U16 * m_hashTable; // must be allocated to sizeof(U16)*(1<<m_tableSizeBits)
S32 m_tableSizeBits;
};
SINTa rr_lzb_simple_encode_fast(rr_lzb_simple_context * ctx,
const void * raw, SINTa rawLen, void * comp);
SINTa rr_lzb_simple_encode_veryfast(rr_lzb_simple_context * ctx,
const void * raw, SINTa rawLen, void * comp);
//---------------------
// rr_lzb_simple_decode returns the number of compressed bytes consumed ( == compLen)
SINTa rr_lzb_simple_decode(const void * comp, SINTa compLen, void * raw, SINTa rawLen);
//---------------------
#endif // _RAD_LZB_SIMPLE_H_
////////////////////////////////
//~ Top-Level Parsing API
RDI_PROC RDI_ParseStatus
rdi_parse(RDI_U8 *data, RDI_U64 size, RDI_Parsed *out)
{
RDI_ParseStatus result = RDI_ParseStatus_Good;
//////////////////////////////
//- rjf: extract header
//
RDI_Header *hdr = 0;
if(result == RDI_ParseStatus_Good)
{
if(sizeof(*hdr) <= size)
{
hdr = (RDI_Header*)data;
}
if(hdr == 0 || hdr->magic != RDI_MAGIC_CONSTANT)
{
hdr = 0;
result = RDI_ParseStatus_HeaderDoesNotMatch;
}
if(hdr != 0 && hdr->encoding_version != RDI_ENCODING_VERSION)
{
hdr = 0;
result = RDI_ParseStatus_UnsupportedVersionNumber;
}
}
//////////////////////////////
//- rjf: extract data sections
//
RDI_Section *dsecs = 0;
RDI_U32 dsec_count = 0;
if(result == RDI_ParseStatus_Good)
{
RDI_U64 opl = (RDI_U64)hdr->data_section_off + (RDI_U64)hdr->data_section_count*sizeof(*dsecs);
if(opl <= size)
{
dsecs = (RDI_Section*)(data + hdr->data_section_off);
dsec_count = hdr->data_section_count;
}
if(dsecs == 0)
{
result = RDI_ParseStatus_InvalidDataSecionLayout;
}
}
//////////////////////////////
//- rjf: fill result
//
if(result == RDI_ParseStatus_Good)
{
out->raw_data = data;
out->raw_data_size = size;
out->sections = dsecs;
out->sections_count = dsec_count;
}
return result;
}
////////////////////////////////
//~ Base Parsed Info Extraction Helpers
//- section table/element raw data extraction
RDI_PROC void *
rdi_section_raw_data_from_kind(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_SectionEncoding *encoding_out, RDI_U64 *size_out)
{
void *result = 0;
#if !defined(RDI_DISABLE_NILS)
result = &rdi_nil_element_union;
*size_out = rdi_section_element_size_table[kind];
#endif
if(0 <= kind && kind < rdi->sections_count &&
rdi->sections[kind].off < rdi->raw_data_size)
{
result = rdi->raw_data+rdi->sections[kind].off;
*size_out = rdi->sections[kind].encoded_size;
*encoding_out = rdi->sections[kind].encoding;
}
return result;
}
RDI_PROC void *
rdi_section_raw_table_from_kind(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 *count_out)
{
void *result = 0;
RDI_U64 all_elements_size = 0;
RDI_SectionEncoding all_elements_encoding = 0;
void *all_elements = rdi_section_raw_data_from_kind(rdi, kind, &all_elements_encoding, &all_elements_size);
if(all_elements_encoding == RDI_SectionEncoding_Unpacked)
{
RDI_U64 element_size = (RDI_U64)rdi_section_element_size_table[kind];
RDI_U64 all_elements_count = all_elements_size/element_size;
result = all_elements;
*count_out = all_elements_count;
}
return result;
}
RDI_PROC void *
rdi_section_raw_element_from_kind_idx(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 idx)
{
RDI_U64 count = 0;
void *table = rdi_section_raw_table_from_kind(rdi, kind, &count);
void *result = table;
if(idx < count)
{
RDI_U64 element_size = (RDI_U64)rdi_section_element_size_table[kind];
result = (RDI_U8 *)table + element_size*idx;
}
return result;
}
//- info about whole parse
RDI_PROC RDI_U64
rdi_decompressed_size_from_parsed(RDI_Parsed *rdi)
{
RDI_U64 decompressed_size = rdi->raw_data_size;
for(RDI_U64 section_idx = 0; section_idx < rdi->sections_count; section_idx += 1)
{
decompressed_size += (rdi->sections[section_idx].unpacked_size - rdi->sections[section_idx].encoded_size);
}
return decompressed_size;
}
//- decompression
internal void
rdi_decompress_parsed(U8 *decompressed_data, U64 decompressed_size, RDI_Parsed *og_rdi)
{
// rjf: copy header
RDI_Header *src_header = (RDI_Header *)og_rdi->raw_data;
RDI_Header *dst_header = (RDI_Header *)decompressed_data;
{
MemoryCopy(dst_header, src_header, sizeof(RDI_Header));
}
// rjf: copy & adjust sections for decompressed version
if(og_rdi->sections_count != 0)
{
RDI_Section *dsec_base = (RDI_Section *)(decompressed_data + dst_header->data_section_off);
MemoryCopy(dsec_base, (U8 *)og_rdi->raw_data + src_header->data_section_off, sizeof(RDI_Section) * og_rdi->sections_count);
U64 off = dst_header->data_section_off + sizeof(RDI_Section) * og_rdi->sections_count;
off += 7;
off -= off%8;
for(U64 idx = 0; idx < og_rdi->sections_count; idx += 1)
{
dsec_base[idx].encoding = RDI_SectionEncoding_Unpacked;
dsec_base[idx].off = off;
dsec_base[idx].encoded_size = dsec_base[idx].unpacked_size;
off += dsec_base[idx].unpacked_size;
off += 7;
off -= off%8;
}
}
// rjf: decompress sections into new decompressed file buffer
if(og_rdi->sections_count != 0)
{
RDI_Section *src_first = og_rdi->sections;
RDI_Section *dst_first = (RDI_Section *)(decompressed_data + dst_header->data_section_off);
RDI_Section *src_opl = src_first + og_rdi->sections_count;
RDI_Section *dst_opl = dst_first + og_rdi->sections_count;
for(RDI_Section *src = src_first, *dst = dst_first;
src < src_opl && dst < dst_opl;
src += 1, dst += 1)
{
rr_lzb_simple_decode((U8*)og_rdi->raw_data + src->off, src->encoded_size,
decompressed_data + dst->off, dst->unpacked_size);
}
}
}
//- strings
RDI_PROC RDI_U8 *
rdi_string_from_idx(RDI_Parsed *rdi, RDI_U32 idx, RDI_U64 *len_out)
{
RDI_U8 *result_base = 0;
RDI_U64 result_size = 0;
{
RDI_U64 string_offs_count = 0;
RDI_U32 *string_offs = rdi_table_from_name(rdi, StringTable, &string_offs_count);
if(idx < string_offs_count)
{
RDI_U64 string_data_size = 0;
RDI_U8 *string_data = rdi_table_from_name(rdi, StringData, &string_data_size);
RDI_U32 off_raw = string_offs[idx];
RDI_U32 opl_raw = string_offs[idx + 1];
RDI_U32 opl = rdi_parse__min(opl_raw, string_data_size);
RDI_U32 off = rdi_parse__min(off_raw, opl);
result_base = string_data + off;
result_size = opl - off;
}
}
*len_out = result_size;
return result_base;
}
//- index runs
RDI_PROC RDI_U32*
rdi_idx_run_from_first_count(RDI_Parsed *rdi, RDI_U32 raw_first, RDI_U32 raw_count, RDI_U32 *n_out)
{
RDI_U64 idx_run_count = 0;
RDI_U32 *idx_run_data = rdi_table_from_name(rdi, IndexRuns, &idx_run_count);
RDI_U32 raw_opl = raw_first + raw_count;
RDI_U32 opl = rdi_parse__min(raw_opl, idx_run_count);
RDI_U32 first = rdi_parse__min(raw_first, opl);
RDI_U32 *result = 0;
if(first < idx_run_count)
{
result = idx_run_data + first;
}
*n_out = opl - first;
return result;
}
//- line info
RDI_PROC void
rdi_parsed_from_line_table(RDI_Parsed *rdi, RDI_LineTable *line_table, RDI_ParsedLineTable *out)
{
//- rjf: extract top-level line info tables
RDI_U64 all_voffs_count = 0;
RDI_U64 *all_voffs = rdi_table_from_name(rdi, LineInfoVOffs, &all_voffs_count);
RDI_U64 *all_voffs_opl = all_voffs + all_voffs_count;
RDI_U64 all_lines_count = 0;
RDI_Line *all_lines = rdi_table_from_name(rdi, LineInfoLines, &all_lines_count);
RDI_Line *all_lines_opl = all_lines + all_lines_count;
RDI_U64 all_cols_count = 0;
RDI_Column *all_cols = rdi_table_from_name(rdi, LineInfoColumns, &all_cols_count);
RDI_Column *all_cols_opl = all_cols + all_cols_count;
//- rjf: extract ranges of top-level tables belonging to this line table
RDI_U64 *lt_voffs = all_voffs + line_table->voffs_base_idx;
RDI_Line *lt_lines = all_lines + line_table->lines_base_idx;
RDI_Column *lt_cols = all_cols + line_table->cols_base_idx;
RDI_U64 lines_count = line_table->lines_count;
RDI_U64 cols_count = line_table->cols_count;
if(lt_voffs >= all_voffs_opl) {lt_voffs = all_voffs; lines_count = 0;}
if(lt_lines >= all_lines_opl) {lt_lines = all_lines; lines_count = 0;}
if(lt_cols >= all_cols_opl) {lt_cols = all_cols; cols_count = 0;}
//- rjf: fill result
out->voffs = lt_voffs;
out->lines = lt_lines;
out->cols = lt_cols;
out->count = lines_count;
out->col_count = cols_count;
}
RDI_PROC RDI_U64
rdi_line_info_idx_range_from_voff(RDI_ParsedLineTable *line_info, RDI_U64 voff, RDI_U64 *n_out)
{
RDI_U64 result = 0;
RDI_U64 n = 0;
if(line_info->count > 0 && line_info->voffs[0] <= voff && voff < line_info->voffs[line_info->count - 1])
{
//- rjf: find i such that: (vmap[i].voff <= voff) && (voff < vmap[i + 1].voff)
// assuming: (i < j) -> (vmap[i].voff < vmap[j].voff)
RDI_U32 first = 0;
RDI_U32 opl = line_info->count;
for(;;)
{
RDI_U32 mid = (first + opl)/2;
if(line_info->voffs[mid] < voff)
{
first = mid;
}
else if(line_info->voffs[mid] > voff)
{
opl = mid;
}
else
{
first = mid;
break;
}
if(opl - first <= 1)
{
break;
}
}
result = (RDI_U64)first;
//- rjf: scan leftward, to find shallowest line info matching this voff
for(;result != 0;)
{
if(line_info->voffs[result-1] == voff)
{
result -= 1;
}
else
{
break;
}
}
//- rjf: scan rightward, to count # of line info with this voff
for(RDI_U64 idx = result; idx < line_info->count; idx += 1)
{
if(line_info->voffs[idx] == voff)
{
n += 1;
}
else
{
break;
}
}
}
if(n_out)
{
*n_out = n;
}
return result;
}
RDI_PROC RDI_U64
rdi_line_info_idx_from_voff(RDI_ParsedLineTable *line_info, RDI_U64 voff)
{
RDI_U64 count = 0;
RDI_U64 result = rdi_line_info_idx_range_from_voff(line_info, voff, &count);
for(RDI_S64 idx = count-1; idx >= 0; idx -= 1)
{
if(result + idx < line_info->count && line_info->lines[result+idx].file_idx != 0)
{
result += idx;
break;
}
}
return result;
}
RDI_PROC void
rdi_parsed_from_source_line_map(RDI_Parsed *rdi, RDI_SourceLineMap *map, RDI_ParsedSourceLineMap *out)
{
//- rjf: extract top-level line info tables
RDI_U64 all_nums_count = 0;
RDI_U32 *all_nums = rdi_table_from_name(rdi, SourceLineMapNumbers, &all_nums_count);
RDI_U32 *all_nums_opl = all_nums + all_nums_count;
RDI_U64 all_rngs_count = 0;
RDI_U32 *all_rngs = rdi_table_from_name(rdi, SourceLineMapRanges, &all_rngs_count);
RDI_U32 *all_rngs_opl = all_rngs + all_rngs_count;
RDI_U64 all_voffs_count = 0;
RDI_U64 *all_voffs = rdi_table_from_name(rdi, SourceLineMapVOffs, &all_voffs_count);
RDI_U64 *all_voffs_opl = all_voffs + all_voffs_count;
//- rjf: extract ranges of top-level tables belonging to this line map
RDI_U32 *map_nums = all_nums + map->line_map_nums_base_idx;
RDI_U32 *map_rngs = all_rngs + map->line_map_range_base_idx;
RDI_U64 *map_voffs= all_voffs+ map->line_map_voff_base_idx;
RDI_U64 lines_count = (RDI_U64)map->line_count;
RDI_U64 voffs_count = (RDI_U64)map->voff_count;
if(map_nums >= all_nums_opl) {map_nums = all_nums; lines_count = 0;}
if(map_rngs >= all_rngs_opl) {map_rngs = all_rngs; lines_count = 0;}
if(map_voffs>= all_voffs_opl){map_voffs= all_voffs;voffs_count = 0;}
//- rjf: fill result
out->nums = map_nums;
out->ranges = map_rngs;
out->voffs = map_voffs;
out->count = lines_count;
out->voff_count = voffs_count;
}
RDI_PROC RDI_U64 *
rdi_line_voffs_from_num(RDI_ParsedSourceLineMap *map, RDI_U32 linenum, RDI_U32 *n_out)
{
RDI_U64 *result = 0;
*n_out = 0;
RDI_U32 closest_i = 0;
if(map->count > 0 && map->nums[0] <= linenum)
{
// assuming: (i < j) -> (nums[i] < nums[j])
// find i such that: (nums[i] <= linenum) && (linenum < nums[i + 1])
RDI_U32 *nums = map->nums;
RDI_U32 first = 0;
RDI_U32 opl = map->count;
for(;;)
{
RDI_U32 mid = (first + opl)/2;
if(nums[mid] < linenum)
{
first = mid;
}
else if(nums[mid] > linenum)
{
opl = mid;
}
else
{
first = mid;
break;
}
if(opl - first <= 1)
{
break;
}
}
closest_i = first;
}
// round up instead of down if possible
if(closest_i + 1 < map->count && map->nums[closest_i] < linenum)
{
closest_i += 1;
}
// set result if possible
if(closest_i < map->count)
{
RDI_U32 first = map->ranges[closest_i];
RDI_U32 opl = map->ranges[closest_i + 1];
if(opl <= map->voff_count)
{
result = map->voffs + first;
*n_out = opl - first;
}
}
return result;
}
//- vmap lookups
RDI_PROC RDI_U64
rdi_vmap_idx_from_voff(RDI_VMapEntry *vmap, RDI_U64 vmap_count, RDI_U64 voff)
{
RDI_U64 result = 0;
if(vmap_count > 0 && vmap[0].voff <= voff && voff < vmap[vmap_count - 1].voff)
{
// assuming: (i < j) -> (vmap[i].voff < vmap[j].voff)
// find i such that: (vmap[i].voff <= voff) && (voff < vmap[i + 1].voff)
RDI_U32 first = 0;
RDI_U32 opl = vmap_count;
for(;;)
{
RDI_U32 mid = (first + opl)/2;
if(vmap[mid].voff < voff)
{
first = mid;
}
else if(vmap[mid].voff > voff)
{
opl = mid;
}
else
{
first = mid;
break;
}
if(opl - first <= 1)
{
break;
}
}
result = (RDI_U64)vmap[first].idx;
}
return result;
}
RDI_PROC RDI_U64
rdi_vmap_idx_from_section_kind_voff(RDI_Parsed *rdi, RDI_SectionKind kind, RDI_U64 voff)
{
RDI_U64 vmaps_count = 0;
RDI_VMapEntry *vmaps = rdi_section_raw_table_from_kind(rdi, kind, &vmaps_count);
RDI_U64 result = rdi_vmap_idx_from_voff(vmaps, vmaps_count, voff);
return result;
}
//- name maps
RDI_PROC void
rdi_parsed_from_name_map(RDI_Parsed *rdi, RDI_NameMap *mapptr, RDI_ParsedNameMap *out)
{
out->buckets = 0;
out->bucket_count = 0;
if(mapptr != 0)
{
RDI_U64 all_buckets_count = 0;
RDI_NameMapBucket *all_buckets = rdi_table_from_name(rdi, NameMapBuckets, &all_buckets_count);
RDI_U64 all_nodes_count = 0;
RDI_NameMapNode *all_nodes = rdi_table_from_name(rdi, NameMapNodes, &all_nodes_count);
out->buckets = all_buckets+mapptr->bucket_base_idx;
out->nodes = all_nodes+mapptr->node_base_idx;
out->bucket_count = mapptr->bucket_count;
out->node_count = mapptr->node_count;
if(mapptr->bucket_base_idx > all_buckets_count)
{
out->buckets = 0;
out->bucket_count = 0;
}
if(mapptr->node_base_idx > all_nodes_count)
{
out->nodes = 0;
out->node_count = 0;
}
}
}
RDI_PROC RDI_NameMapNode*
rdi_name_map_lookup(RDI_Parsed *p, RDI_ParsedNameMap *map, RDI_U8 *str, RDI_U64 len)
{
RDI_NameMapNode *result = 0;
if(map->bucket_count > 0)
{
RDI_NameMapBucket *buckets = map->buckets;
RDI_U64 bucket_count = map->bucket_count;
RDI_U64 hash = rdi_hash(str, len);
RDI_U64 bucket_index = hash%bucket_count;
RDI_NameMapBucket *bucket = map->buckets + bucket_index;
RDI_NameMapNode *node = map->nodes + bucket->first_node;
RDI_NameMapNode *node_opl = node + bucket->node_count;
for(;node < node_opl; node += 1)
{
// extract a string from this node
RDI_U64 nlen = 0;
RDI_U8 *nstr = rdi_string_from_idx(p, node->string_idx, &nlen);
// compare this to the needle string
RDI_S32 match = 0;
if(nlen == len)
{
RDI_U8 *a = str;
RDI_U8 *aopl = str + len;
RDI_U8 *b = nstr;
for (;a < aopl && *a == *b; a += 1, b += 1);
match = (a == aopl);
}
// stop with a matching node in result
if(match)
{
result = node;
break;
}
}
}
return result;
}
RDI_PROC RDI_U32*
rdi_matches_from_map_node(RDI_Parsed *p, RDI_NameMapNode *node, RDI_U32 *n_out)
{
RDI_U32 *result = 0;
*n_out = 0;
if(node != 0)
{
if(node->match_count == 1)
{
result = &node->match_idx_or_idx_run_first;
*n_out = 1;
}
else
{
result = rdi_idx_run_from_first_count(p, node->match_idx_or_idx_run_first, node->match_count, n_out);
}
}
return result;
}
////////////////////////////////
//~ High-Level Composite Lookup Functions
//- procedures
RDI_PROC RDI_Procedure *
rdi_procedure_from_name(RDI_Parsed *rdi, RDI_U8 *name, RDI_U64 name_size)
{
RDI_NameMap *map = rdi_element_from_name_idx(rdi, NameMaps, RDI_NameMapKind_Procedures);
RDI_ParsedNameMap map_parsed = {0};
rdi_parsed_from_name_map(rdi, map, &map_parsed);
RDI_NameMapNode *node = rdi_name_map_lookup(rdi, &map_parsed, name, name_size);
RDI_U32 id_count = 0;
RDI_U32 *ids = rdi_matches_from_map_node(rdi, node, &id_count);
RDI_U32 procedure_idx = 0;
if(id_count > 0)
{
procedure_idx = ids[0];
}
RDI_Procedure *procedure = rdi_element_from_name_idx(rdi, Procedures, procedure_idx);
return procedure;
}
RDI_PROC RDI_Procedure *
rdi_procedure_from_name_cstr(RDI_Parsed *rdi, char *cstr)
{
RDI_Procedure *result = rdi_procedure_from_name(rdi, (RDI_U8 *)cstr, rdi_cstring_length(cstr));
return result;
}
RDI_PROC RDI_U8 *
rdi_name_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure, RDI_U64 *len_out)
{
return rdi_string_from_idx(rdi, procedure->name_string_idx, len_out);
}
RDI_PROC RDI_Scope *
rdi_root_scope_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure)
{
RDI_Scope *scope = rdi_element_from_name_idx(rdi, Scopes, procedure->root_scope_idx);
return scope;
}
RDI_PROC RDI_UDT *
rdi_container_udt_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure)
{
RDI_U64 idx = 0;
if(procedure->link_flags & RDI_LinkFlag_TypeScoped)
{
idx = procedure->container_idx;
}
RDI_UDT *udt = rdi_element_from_name_idx(rdi, UDTs, idx);
return udt;
}
RDI_PROC RDI_Procedure *
rdi_container_procedure_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure)
{
RDI_U64 idx = 0;
if(procedure->link_flags & RDI_LinkFlag_ProcScoped)
{
idx = procedure->container_idx;
}
RDI_Procedure *container_procedure = rdi_element_from_name_idx(rdi, Procedures, idx);
return container_procedure;
}
RDI_PROC RDI_U64
rdi_first_voff_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure)
{
RDI_Scope *scope = rdi_root_scope_from_procedure(rdi, procedure);
RDI_U64 result = rdi_first_voff_from_scope(rdi, scope);
return result;
}
RDI_PROC RDI_U64
rdi_opl_voff_from_procedure(RDI_Parsed *rdi, RDI_Procedure *procedure)
{
RDI_Scope *scope = rdi_root_scope_from_procedure(rdi, procedure);
RDI_U64 result = rdi_opl_voff_from_scope(rdi, scope);
return result;
}
RDI_PROC RDI_Procedure *
rdi_procedure_from_voff(RDI_Parsed *rdi, RDI_U64 voff)
{
RDI_Scope *scope = rdi_scope_from_voff(rdi, voff);
RDI_Procedure *procedure = rdi_procedure_from_scope(rdi, scope);
return procedure;
}
//- scopes
RDI_PROC RDI_U64
rdi_first_voff_from_scope(RDI_Parsed *rdi, RDI_Scope *scope)
{
RDI_U64 *voffs = rdi_element_from_name_idx(rdi, ScopeVOffData, scope->voff_range_first);
RDI_U64 result = *voffs;
return result;
}
RDI_PROC RDI_U64
rdi_opl_voff_from_scope(RDI_Parsed *rdi, RDI_Scope *scope)
{
RDI_U64 result = 0;
if(scope->voff_range_opl != 0)
{
RDI_U64 *voffs = rdi_element_from_name_idx(rdi, ScopeVOffData, scope->voff_range_opl-1);
result = *voffs;
}
return result;
}
RDI_PROC RDI_Scope *
rdi_scope_from_voff(RDI_Parsed *rdi, RDI_U64 voff)
{
RDI_U32 idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_ScopeVMap, voff);
RDI_Scope *scope = rdi_element_from_name_idx(rdi, Scopes, idx);
return scope;
}
RDI_PROC RDI_Scope *
rdi_parent_from_scope(RDI_Parsed *rdi, RDI_Scope *scope)
{
RDI_Scope *parent = rdi_element_from_name_idx(rdi, Scopes, scope->parent_scope_idx);
return parent;
}
RDI_PROC RDI_Procedure *
rdi_procedure_from_scope(RDI_Parsed *rdi, RDI_Scope *scope)
{
RDI_Procedure *procedure = rdi_element_from_name_idx(rdi, Procedures, scope->proc_idx);
return procedure;
}
RDI_PROC RDI_InlineSite *
rdi_inline_site_from_scope(RDI_Parsed *rdi, RDI_Scope *scope)
{
RDI_InlineSite *inline_site = rdi_element_from_name_idx(rdi, InlineSites, scope->inline_site_idx);
return inline_site;
}
//- global variables
RDI_PROC RDI_GlobalVariable *
rdi_global_variable_from_voff(RDI_Parsed *rdi, RDI_U64 voff)
{
RDI_U32 idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_GlobalVMap, voff);
RDI_GlobalVariable *gvar = rdi_element_from_name_idx(rdi, GlobalVariables, idx);
return gvar;
}
//- units
RDI_PROC RDI_Unit *
rdi_unit_from_voff(RDI_Parsed *rdi, RDI_U64 voff)
{
RDI_U32 unit_idx = rdi_vmap_idx_from_section_kind_voff(rdi, RDI_SectionKind_UnitVMap, voff);
RDI_Unit *unit = rdi_element_from_name_idx(rdi, Units, unit_idx);
return unit;
}
RDI_PROC RDI_LineTable *
rdi_line_table_from_unit(RDI_Parsed *rdi, RDI_Unit *unit)
{
RDI_LineTable *line_table = rdi_element_from_name_idx(rdi, LineTables, unit->line_table_idx);
return line_table;
}
//- line info
RDI_PROC RDI_Line
rdi_line_from_voff(RDI_Parsed *rdi, RDI_U64 voff)
{
RDI_Unit *unit = rdi_unit_from_voff(rdi, voff);
RDI_LineTable *line_table = rdi_line_table_from_unit(rdi, unit);
RDI_Line line = rdi_line_from_line_table_voff(rdi, line_table, voff);
return line;
}
RDI_PROC RDI_Line
rdi_line_from_line_table_voff(RDI_Parsed *rdi, RDI_LineTable *line_table, RDI_U64 voff)
{
RDI_ParsedLineTable parsed = {0};
rdi_parsed_from_line_table(rdi, line_table, &parsed);
RDI_U64 line_info_idx = rdi_line_info_idx_from_voff(&parsed, voff);
RDI_Line result = {0};
if(line_info_idx < parsed.count)
{
result = parsed.lines[line_info_idx];
}
return result;
}
RDI_PROC RDI_SourceFile *
rdi_source_file_from_line(RDI_Parsed *rdi, RDI_Line *line)
{
RDI_SourceFile *result = rdi_element_from_name_idx(rdi, SourceFiles, line->file_idx);
return result;
}
//- source files
RDI_PROC RDI_SourceFile *
rdi_source_file_from_normal_path(RDI_Parsed *rdi, RDI_U8 *name, RDI_U64 name_size)
{
RDI_NameMap *map = rdi_element_from_name_idx(rdi, NameMaps, RDI_NameMapKind_NormalSourcePaths);
RDI_ParsedNameMap map_parsed = {0};
rdi_parsed_from_name_map(rdi, map, &map_parsed);
RDI_NameMapNode *node = rdi_name_map_lookup(rdi, &map_parsed, name, name_size);
RDI_U32 id_count = 0;
RDI_U32 *ids = rdi_matches_from_map_node(rdi, node, &id_count);
RDI_U32 file_idx = 0;
if(id_count > 0)
{
file_idx = ids[0];
}
RDI_SourceFile *file = rdi_element_from_name_idx(rdi, SourceFiles, file_idx);
return file;
}
RDI_PROC RDI_SourceFile *
rdi_source_file_from_normal_path_cstr(RDI_Parsed *rdi, char *cstr)
{
RDI_SourceFile *result = rdi_source_file_from_normal_path(rdi, (RDI_U8 *)cstr, rdi_cstring_length(cstr));
return result;
}
RDI_PROC RDI_U8 *
rdi_normal_path_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file, RDI_U64 *len_out)
{
return rdi_string_from_idx(rdi, src_file->normal_full_path_string_idx, len_out);
}
RDI_PROC RDI_FilePathNode *
rdi_file_path_node_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file)
{
RDI_FilePathNode *result = rdi_element_from_name_idx(rdi, FilePathNodes, src_file->file_path_node_idx);
return result;
}
RDI_PROC RDI_SourceLineMap *
rdi_source_line_map_from_source_file(RDI_Parsed *rdi, RDI_SourceFile *src_file)
{
RDI_SourceLineMap *result = rdi_element_from_name_idx(rdi, SourceLineMaps, src_file->source_line_map_idx);
return result;
}
RDI_PROC RDI_U64
rdi_first_voff_from_source_file_line_num(RDI_Parsed *rdi, RDI_SourceFile *src_file, RDI_U32 line_num)
{
RDI_SourceLineMap *source_line_map = rdi_source_line_map_from_source_file(rdi, src_file);
RDI_U64 voff = rdi_first_voff_from_source_line_map_num(rdi, source_line_map, line_num);
return voff;
}
//- source line maps
RDI_PROC RDI_U64
rdi_first_voff_from_source_line_map_num(RDI_Parsed *rdi, RDI_SourceLineMap *map, RDI_U32 line_num)
{
RDI_ParsedSourceLineMap parsed = {0};
rdi_parsed_from_source_line_map(rdi, map, &parsed);
RDI_U32 all_voffs_count = 0;
RDI_U64 *all_voffs = rdi_line_voffs_from_num(&parsed, line_num, &all_voffs_count);
RDI_U64 voff = 0;
if(all_voffs_count != 0)
{
voff = all_voffs[0];
}
return voff;
}
//- file path nodes
RDI_PROC RDI_FilePathNode *
rdi_parent_from_file_path_node(RDI_Parsed *rdi, RDI_FilePathNode *node)
{
RDI_FilePathNode *result = rdi_element_from_name_idx(rdi, FilePathNodes, node->parent_path_node);
return result;
}
RDI_PROC RDI_U8 *
rdi_name_from_file_path_node(RDI_Parsed *rdi, RDI_FilePathNode *node, RDI_U64 *len_out)
{
return rdi_string_from_idx(rdi, node->name_string_idx, len_out);
}
////////////////////////////////
//~ Parser Helpers
RDI_PROC RDI_U64
rdi_cstring_length(char *cstr)
{
RDI_U64 result = 0;
for(;cstr[result] != 0; result += 1){}
return result;
}
RDI_PROC RDI_U64
rdi_size_from_bytecode_stream(RDI_U8 *ptr, RDI_U8 *opl)
{
RDI_U64 bytecode_size = 0;
RDI_U8 *off_first = ptr + sizeof(RDI_LocationKind);
for(RDI_U8 *off = off_first, *next_off = opl; off < opl; off = next_off)
{
RDI_U8 op = *off;
if(op == 0)
{
break;
}
RDI_U16 ctrlbits = rdi_eval_op_ctrlbits_table[op];
RDI_U32 p_size = RDI_DECODEN_FROM_CTRLBITS(ctrlbits);
bytecode_size += (1 + p_size);
next_off = (off + 1 + p_size);
}
return bytecode_size;
}
////////////////////////////////
//~ Compression/Decompression Implementation
#include <string.h>
//-------------------------------------------------
// UINTr = int the size of a register
#ifdef __RAD64REGS__
#define RAD_UINTr RAD_U64
#define RAD_SINTr RAD_S64
#define readR read64
#define writeR write64
#define rrClzBytesR rrClzBytes64
#define rrCtzBytesR rrCtzBytes64
#else
#define RAD_UINTr RAD_U32
#define RAD_SINTr RAD_S32
#define readR read32
#define writeR write32
#define rrClzBytesR rrClzBytes32
#define rrCtzBytesR rrCtzBytes32
#endif
typedef RAD_SINTr SINTr;
typedef RAD_UINTr UINTr;
#define OOINLINE RADFORCEINLINE
#define if_unlikely(exp) if ( RAD_UNLIKELY( exp ) )
#define if_likely( exp) if ( RAD_LIKELY( exp ) )
// Raw byte IO
#if defined(__RADARM__) && !defined(__RAD64__) && defined(__GNUC__)
// older GCCs don't turn the memcpy variant into loads/stores, but
// they do support this:
typedef union
{
U16 u16;
U32 u32;
U64 u64;
} __attribute__((packed)) unaligned_type;
static inline U16 read16(const void *ptr) { return ((const unaligned_type *)ptr)->u16; }
static inline void write16(void *ptr, U16 x) { ((unaligned_type *)ptr)->u16 = x; }
static inline U32 read32(const void *ptr) { return ((const unaligned_type *)ptr)->u32; }
static inline void write32(void *ptr, U32 x) { ((unaligned_type *)ptr)->u32 = x; }
static inline U64 read64(const void *ptr) { return ((const unaligned_type *)ptr)->u64; }
static inline void write64(void *ptr, U64 x) { ((unaligned_type *)ptr)->u64 = x; }
#else
// most C compilers we target are smart enough to turn this into single loads/stores
static inline U16 read16(const void *ptr) { U16 x; memcpy(&x, ptr, sizeof(x)); return x; }
static inline void write16(void *ptr, U16 x) { memcpy(ptr, &x, sizeof(x)); }
static inline U32 read32(const void *ptr) { U32 x; memcpy(&x, ptr, sizeof(x)); return x; }
static inline void write32(void *ptr, U32 x) { memcpy(ptr, &x, sizeof(x)); }
static inline U64 read64(const void *ptr) { U64 x; memcpy(&x, ptr, sizeof(x)); return x; }
static inline void write64(void *ptr, U64 x) { memcpy(ptr, &x, sizeof(x)); }
#endif
#define RR_PUT16_LE_UNALIGNED(ptr,val) RR_PUT16_LE(ptr,val)
#define RR_PUT16_LE_UNALIGNED_OFFSET(ptr,val,offset) RR_PUT16_LE_OFFSET(ptr,val,offset)
//===========================================================================
static RADINLINE SINTa rrPtrDiffV(void * end, void *start) { return (SINTa)( ((char *)(end)) - ((char *)(start)) ); }
// helper function to show I really am intending to put a pointer difference in an int :
static RADINLINE SINTa rrPtrDiff(SINTa val) { return val; }
static RADINLINE S32 rrPtrDiff32(SINTa val) { S32 ret = (S32) val; RR_ASSERT( (SINTa)ret == val ); return ret; }
static RADINLINE SINTr rrPtrDiffR(SINTa val) { SINTr ret = (SINTr) val; RR_ASSERT( (SINTa)ret == val ); return ret; }
//=================================================================
#define LZB_LRL_BITS 4
#define LZB_LRL_ESCAPE 15
#define LZB_ML_BITS 4
#define LZB_MLCONTROL_ESCAPE 15
#define LZB_SLIDING_WINDOW_POW2 16
#define LZB_SLIDING_WINDOW_SIZE (1<<LZB_SLIDING_WINDOW_POW2)
#define LZB_MAX_OFFSET 0xFFFF
#define LZB_MML 4 // should be 3 if I had LO
#define LZB_MATCHLEN_ESCAPE (LZB_MLCONTROL_ESCAPE+4)
#define LZB_END_WITH_LITERALS 1 // @@ ??
//#define LZB_END_WITH_LITERALS 0 // @@ ??
#define LZB_END_OF_BLOCK_NO_MATCH_ZONE 8
/**
NOTE ABOUT LZB_END_OF_BLOCK_NO_MATCH_ZONE
The limitation in LZB does not actually come from the 8-at-a-time match copier
it comes from the unconditional 8-byte LRL copy
that means the last 8 bytes of every block *must* be literals
(note that's *block* not quantum)
The constraint due to matches is actually weaker
(match len rounded up to next multiple of 8 must not go past block end)
**/
// decode speed on lzt99 :
// LZ4 : 1715.10235
#define LZB_FORCELASTLRL9 1
//=======================================
#define lz_copywordstep(d,s) do { writeR(d, readR(s)); (s) += sizeof(UINTr); (d) += sizeof(UINTr); } while(0)
#define lz_copywordsteptoend(d,s,e) do { lz_copywordstep(d,s); } while ((d)<(e))
// lz_copysteptoend_overrunok
// NOTE : unlike memcpy, adjusts dest pointer to end !
#define lz_copysteptoend_overrunok(d,s,l) do { U8 * e=(d)+(l); lz_copywordsteptoend(d,s,e); d=e; } while(0)
//=======================================
#define LZB_PutExcessBW(cp,val) do { \
if ( val < 192 ) *cp++ = (U8) val; \
else { val -= 192; *cp++ = 192 + (U8) ( val&0x3F); val >>= 6; \
if ( val < 128 ) *cp++ = (U8) val; \
else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \
if ( val < 128 ) *cp++ = (U8) val; \
else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; \
if ( val < 128 ) *cp++ = (U8) val; \
else { val -= 128; *cp++ = 128 + (U8) ( val&0x7F); val >>= 7; *cp++ = (U8) val; } } } } \
} while(0)
// max bytes consumed: 5
#define LZB_AddExcessBW(cp,val) do { U32 b = *cp++; \
if ( b < 192 ) val += b; \
else { val += 192; val += (b-192); b = *cp++; \
val += (b<<6); if ( b >= 128 ) { b = *cp++; \
val += (b<<13); if ( b >= 128 ) { b = *cp++; \
val += (b<<20); if ( b >= 128 ) { b = *cp++; \
val += (b<<27); } } } } \
} while(0)
#define LZB_PutExcessLRL(cp,val) LZB_PutExcessBW(cp,val)
#define LZB_PutExcessML(cp,val) LZB_PutExcessBW(cp,val)
#define LZB_AddExcessLRL(cp,val) LZB_AddExcessBW(cp,val)
#define LZB_AddExcessML(cp,val) LZB_AddExcessBW(cp,val)
//=============================================================================
// match copies :
// used for LRL :
static OOINLINE void copy_no_overlap_long(U8 * to, const U8 * from, SINTr length)
{
for(int i=0;i<length;i+=8)
write64(to+i, read64(from+i));
}
static OOINLINE void copy_no_overlap_nooverrun(U8 * to, const U8 * from, SINTr length)
{
// used for final LRL of every block
// must not overrun
memmove(to,from,(size_t)length);
}
RR_COMPILER_ASSERT( LZB_MLCONTROL_ESCAPE == 15 );
RR_COMPILER_ASSERT( LZB_MATCHLEN_ESCAPE == 19 );
static OOINLINE void copy_match_short_overlap(U8 * to, const U8 * from, SINTr ml)
{
RR_ASSERT( ml >= LZB_MML && ml < LZB_MATCHLEN_ESCAPE );
// overlap
// @@ err not awesome
to[0] = from[0];
to[1] = from[1];
to[2] = from[2];
to[3] = from[3];
to[4] = from[4];
to[5] = from[5];
to[6] = from[6];
to[7] = from[7];
if ( ml > 8 )
{
to += 8; from += 8; ml -= 8;
// max of 10 more
while(ml--)
{
*to++ = *from++;
}
}
}
static OOINLINE void copy_match_memset(U8 * to, int c, SINTr ml)
{
RR_ASSERT( ml >= 4 );
U32 four = c * 0x01010101;
U8 * end = to + ml;
write32(to, four); to += 4;
while(to<end)
{
write32(to, four); to += 4;
}
}
//=============================================================================
static SINTa rr_lzb_simple_decode_notexpanded(const void * comp, void * raw, SINTa rawLen)
{
U8 * rp = (U8 *)raw;
U8 * rpEnd = rp+rawLen;
const U8 * cp = (const U8 *)comp;
for(;;)
{
RR_ASSERT( rp < rpEnd );
// max bytes consumed (fast paths):
// - 1 control
// - lits:
// * 15 lits OR
// * 5 excess lrl + long lit run
// - match:
// * 2 match offset (short match) OR
// * 1 excess code + 5 excess ML (overlap match) OR
// * 1 excess code + 5 excess ML (long match)
//
// need near-end checks mainly on long lit runs.
UINTr control = *cp++;
UINTr lrl = control & 0xF;
UINTr ml_control = (control>>4);
// copy 4 literals speculatively :
write32( rp , read32(cp) );
//RR_ASSERT( lrl >= 8 || ml_control >= 8 );
if ( lrl > 4 )
{
// if lrl was <= 8 we did it, else need this :
if_unlikely ( lrl > 8 )
{
if_unlikely ( lrl >= LZB_LRL_ESCAPE )
{
LZB_AddExcessLRL( cp, lrl );
// hide the EOF check here ?
// has to be after the GetExcess
if_unlikely ( rp+lrl >= rpEnd )
{
RR_ASSERT( rp+lrl == rpEnd );
copy_no_overlap_nooverrun(rp,cp,lrl);
rp += lrl;
cp += lrl;
break;
}
else
{
// total undo of the previous copy
copy_no_overlap_long(rp,cp,lrl);
}
}
else // > 8 but not 0xF
{
// hide the EOF check here ?
if_unlikely ( rp+lrl >= rpEnd )
{
if ( lrl == 9 )
{
// may be a false 9
lrl = rrPtrDiff32( rpEnd - rp );
}
RR_ASSERT( rp+lrl == rpEnd );
copy_no_overlap_nooverrun(rp,cp,lrl);
rp += lrl;
cp += lrl;
break;
}
else
{
write32( rp+4 , read32(cp+4) );
// put 8 more :
write64( (rp+8) , read64((cp+8)) );
}
}
}
else
{
write32( rp+4 , read32(cp+4) );
}
}
rp += lrl;
cp += lrl;
RR_ASSERT( rp+LZB_MML <= rpEnd );
UINTr ml = ml_control + LZB_MML;
// speculatively grab offset but don't advance cp yet
UINTr off = RR_GET16_LE_UNALIGNED(cp);
if ( ml_control <= 8 )
{
cp += 2; // consume offset
const U8 * match = rp - off;
RR_ASSERT( ml <= 12 );
write64( rp , read64(match) );
write32( rp+8 , read32(match+8) );
rp += ml;
continue;
}
else
{
if_likely( ml_control < LZB_MLCONTROL_ESCAPE ) // short match
{
cp += 2; // consume offset
const U8 * match = rp - off;
RR_ASSERT( off >= 8 || ml <= off );
write64( rp , read64(match) );
write64( rp+8 , read64(match+8) );
if ( ml > 16 )
{
write16( rp+16, read16(match+16) );
}
}
else
{
// get 1-byte excess code
UINTr excesslow = off&127;
cp++; // consume 1
//if ( excess1 >= 128 )
if ( off & 128 )
{
ml_control = excesslow >> 3;
ml = ml_control + LZB_MML;
if ( ml_control == 0xF )
{
// get more ml
LZB_AddExcessML( cp, ml );
}
UINTr myoff = off & 7;
// low offset, can't do 8-byte grabs
if ( myoff == 1 )
{
int c = rp[-1];
copy_match_memset(rp,c,ml);
}
else
{
// shit but whatever, very rare
for(UINTr i=0;i<ml;i++)
{
rp[i] = rp[i-myoff];
}
}
}
else
{
UINTr myoff = RR_GET16_LE_UNALIGNED(cp); cp += 2;
const U8 * match = rp - myoff;
ml += excesslow;
if ( excesslow == 127 )
{
// get more ml
LZB_AddExcessML( cp, ml );
}
// 8-byte copier :
copy_no_overlap_long(rp,match,ml);
}
}
rp += ml;
}
}
RR_ASSERT( rp == rpEnd );
SINTa used = rrPtrDiff( cp - (const U8 *)comp );
RR_ASSERT( used < rawLen );
return used;
}
SINTa rr_lzb_simple_decode(const void * comp, SINTa compLen, void * raw, SINTa rawLen)
{
RR_ASSERT_ALWAYS( compLen <= rawLen );
if ( compLen == rawLen )
{
memcpy(raw,comp,rawLen);
return compLen;
}
return rr_lzb_simple_decode_notexpanded(comp,raw,rawLen);
}
//=====================================================
static RADINLINE U32 hmf_hash4_32(U32 ptr32)
{
U32 h = ( ptr32 * 2654435761u );
h ^= (h>>13);
return h;
}
#define HashMatchFinder_Hash32 hmf_hash4_32
//=================================================================================
#define LZB_Hash4 hmf_hash4_32
static RADINLINE U32 LZB_SecondHash4(U32 be4)
{
const U32 m = 0x5bd1e995;
U32 h = be4 * m;
h += (h>>11);
return h;
}
//=============================================
static int RADFORCEINLINE GetNumBytesZeroNeverAllR(UINTr x)
{
RR_ASSERT( x != 0 );
#if defined(__RADBIGENDIAN__)
// big endian, so earlier bytes are at the top
int nb = (int)rrClzBytesR(x);
#elif defined(__RADLITTLEENDIAN__)
// little endian, so earlier bytes are at the bottom
int nb = (int)rrCtzBytesR(x);
#else
#error wtf no endian set
#endif
RR_ASSERT( nb >= 0 && nb < (int)sizeof(UINTr) );
return nb;
}
//===============================
static RADFORCEINLINE U8 * LZB_Output(U8 * cp, S32 lrl, const U8 * literals, S32 matchlen , S32 mo )
{
RR_ASSERT( lrl >= 0 );
RR_ASSERT( matchlen >= LZB_MML );
RR_ASSERT( mo > 0 && mo <= LZB_MAX_OFFSET );
//rrprintf("[%3d][%3d][%7d]\n",lrl,ml,mo);
S32 sendml = matchlen - LZB_MML;
U32 ml_in_control = RR_MIN(sendml,LZB_MLCONTROL_ESCAPE);
if ( mo >= 8 ) // no overlap
{
if ( lrl < LZB_LRL_ESCAPE )
{
U32 control = lrl | (ml_in_control<<4);
*cp++ = (U8) control;
write64(cp, read64(literals));
if ( lrl > 8 )
{
write64(cp+8, read64(literals+8));
}
cp += lrl;
}
else
{
U32 control = LZB_LRL_ESCAPE | (ml_in_control<<4);
*cp++ = (U8) control;
U32 lrl_excess = lrl - LZB_LRL_ESCAPE;
LZB_PutExcessLRL(cp,lrl_excess);
// @@ ? is this okay for overrun ?
lz_copysteptoend_overrunok(cp,literals,lrl);
}
if ( ml_in_control < LZB_MLCONTROL_ESCAPE )
{
RR_ASSERT( (U16)(mo) == mo );
RR_PUT16_LE_UNALIGNED(cp,(U16)(mo));
cp += 2;
}
else
{
U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE;
// put special first byte, then offset, then remainder
if ( ml_excess < 127 )
{
*cp++ = (U8)ml_excess;
RR_ASSERT( (U16)(mo) == mo );
RR_PUT16_LE_UNALIGNED(cp,(U16)(mo));
cp += 2;
}
else
{
*cp++ = (U8)127;
RR_ASSERT( (U16)(mo) == mo );
RR_PUT16_LE_UNALIGNED(cp,(U16)(mo));
cp += 2;
ml_excess -= 127;
LZB_PutExcessML(cp,ml_excess);
}
}
}
else
{
U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE);
// overlap case
U32 control = (lrl_in_control) | (LZB_MLCONTROL_ESCAPE<<4);
*cp++ = (U8) control;
if ( lrl_in_control == LZB_LRL_ESCAPE )
{
U32 lrl_excess = lrl - LZB_LRL_ESCAPE;
LZB_PutExcessLRL(cp,lrl_excess);
}
lz_copysteptoend_overrunok(cp,literals,lrl);
//cp += lrl;
// special excess1 :
UINTr excess1 = 128 + (ml_in_control<<3) + mo;
RR_ASSERT( excess1 < 256 );
*cp++ = (U8)excess1;
if ( ml_in_control == LZB_MLCONTROL_ESCAPE )
{
U32 ml_excess = sendml - LZB_MLCONTROL_ESCAPE;
LZB_PutExcessML(cp,ml_excess);
}
}
return cp;
}
#if LZB_FORCELASTLRL9
static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals )
{
RR_ASSERT( lrl >= 0 );
//U32 ml = 0;
//U32 mo = 0;
U32 lrl_in_control = RR_MIN(lrl,LZB_LRL_ESCAPE);
#if LZB_END_WITH_LITERALS
// lrl_in_control must be at least 9
lrl_in_control = RR_MAX(lrl_in_control,9);
#endif
U32 control = lrl_in_control;
*cp++ = (U8) control;
if ( lrl_in_control == LZB_LRL_ESCAPE )
{
U32 lrl_excess = lrl - LZB_LRL_ESCAPE;
LZB_PutExcessLRL(cp,lrl_excess);
}
memmove(cp,literals,lrl);
cp += lrl;
return cp;
}
#else
static RADINLINE U8 * LZB_OutputLast(U8 * cp, S32 lrl, const U8 * literals )
{
cp = LZB_Output(cp,lrl,literals,LZB_MML,1);
// remove the offset we put :
cp -= 2;
return cp;
}
#endif
//===============================================================
static void rr_lzb_simple_context_init(rr_lzb_simple_context * ctx) //, const void * base)
{
RR_ASSERT( ctx->m_tableSizeBits >= 12 && ctx->m_tableSizeBits <= 24 );
memset(ctx->m_hashTable,0,sizeof(U16)*((SINTa)1<<ctx->m_tableSizeBits));
}
//===============================================================
/*
#define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression,
#define DO_FAST_2ND_HASH // rate= 30.69 mb/s , 15451369 <- turning this off is the best way to get more speed and less compression
/*/
#define FAST_HASH_DEPTH_SHIFT (0)
#define DO_FAST_2ND_HASH
/**/
// lzt99, 24700820, 15475520, 16677179
//encode only : 0.880 seconds, 1.62 b/hc, rate= 28.08 mb/s
//#define FAST_HASH_DEPTH_SHIFT (1) // more depth = more & more compression, but slower
#define DO_FAST_UPDATE_MATCH_HASHES 1 // helps compression a lot , like 0.30
//#define DO_FAST_UPDATE_MATCH_HASHES 2 // helps compression a lot , like 0.30
#define DO_FAST_LAZY_MATCH // also helps a lot , like 0.15
#define DO_FAST_HASH_DWORD 1
#define FAST_MULTISTEP_LITERALS_SHIFT (5)
//-----------------------
// derived :
/*
#define FAST_HASH_BITS (FAST_HASH_TOTAL_BITS-FAST_HASH_DEPTH_SHIFT)
#define FAST_HASH_SIZE (1<<FAST_HASH_BITS)
#define FAST_HASH_MASK (FAST_HASH_SIZE-1)
*/
#undef FAST_HASH_DEPTH
#define FAST_HASH_DEPTH (1<<FAST_HASH_DEPTH_SHIFT)
/*
#if FAST_HASH_DEPTH == 1
#error nope
#endif
*/
#undef FAST_HASH_CYCLE_MASK
#define FAST_HASH_CYCLE_MASK (FAST_HASH_DEPTH-1)
#undef FAST_HASH_INDEX
#if FAST_HASH_DEPTH > 1
#define FAST_HASH_INDEX(h,d) ( ((h)<<FAST_HASH_DEPTH_SHIFT) + (d) )
#else
#define FAST_HASH_INDEX(h,d) (h)
#endif
#undef FAST_HASH_FUNC
#define FAST_HASH_FUNC(ptr,dword) ( LZB_Hash4(dword) & hash_table_mask )
static SINTa rr_lzb_simple_encode_fast_sub(rr_lzb_simple_context * fh,
const void * raw, SINTa rawLen, void * comp)
{
//SIMPLEPROFILE_SCOPE_N(lzbfast_sub,rawLen);
//THREADPROFILEFUNC();
U8 * cp = (U8 *)comp;
U8 * compExpandedPtr = cp + rawLen - 8;
const U8 * rp = (const U8 *)raw;
const U8 * rpEnd = rp+rawLen;
const U8 * rpMatchEnd = rpEnd - LZB_END_OF_BLOCK_NO_MATCH_ZONE;
const U8 * rpEndSafe = rpMatchEnd - LZB_MML;
if ( rpEndSafe <= (U8 *)raw )
{
// can't compress
return rawLen+1;
}
const U8 * literals_start = rp;
#if FAST_HASH_DEPTH > 1
int hashCycle = 0;
#endif
U16 * hashTable16 = fh->m_hashTable;
int hashTableSizeBits = fh->m_tableSizeBits;
U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits - FAST_HASH_DEPTH_SHIFT)) - 1);
const U8 * zeroPosPtr = (const U8 *)raw;
// first byte is always a literal
rp++;
for(;;)
{
S32 matchOff;
UINTr failedMatches = (1<<FAST_MULTISTEP_LITERALS_SHIFT) + 3;
U32 rp32 = read32(rp);
U32 hash = FAST_HASH_FUNC(rp, rp32 );
SINTa curpos;
const U8 * hashrp;
#ifdef DO_FAST_2ND_HASH
U32 hash2;
#endif
// literals :
for(;;)
{
curpos = rrPtrDiff(rp - zeroPosPtr);
RR_ASSERT( curpos >= 0 );
#ifdef DO_FAST_2ND_HASH
hash2 = ( LZB_SecondHash4(rp32) ) & hash_table_mask;
#endif
#if FAST_HASH_DEPTH > 1
for(int d=0;d<FAST_HASH_DEPTH;d++)
#endif
{
U16 hashpos16 = hashTable16[ FAST_HASH_INDEX(hash,d) ];
matchOff = (U16)(curpos - hashpos16);
RR_ASSERT( matchOff >= 0 );
hashrp = rp - matchOff;
//if ( matchOff <= LZB_MAX_OFFSET )
RR_ASSERT( matchOff <= LZB_MAX_OFFSET );
{
const U32 hashrp32 = read32(hashrp);
if ( rp32 == hashrp32 && matchOff != 0 )
{
goto found_match;
}
}
}
#ifdef DO_FAST_2ND_HASH
#if FAST_HASH_DEPTH > 1
for(int d=0;d<FAST_HASH_DEPTH;d++)
#endif
{
U16 hashpos16 = hashTable16[ FAST_HASH_INDEX(hash2,d) ];
matchOff = (U16)(curpos - hashpos16);
RR_ASSERT( matchOff >= 0 );
hashrp = rp - matchOff;
RR_ASSERT( matchOff <= LZB_MAX_OFFSET );
{
const U32 hashrp32 = read32(hashrp);
if ( rp32 == hashrp32 && matchOff != 0 )
{
goto found_match;
}
}
}
#endif
//---------------------------
// update hash :
hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos;
#ifdef DO_FAST_2ND_HASH
// do NOT step hashCycle !
//hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos;
#endif
#if FAST_HASH_DEPTH > 1
hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
#endif
UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT);
RR_ASSERT( stepLiterals >= 1 );
++failedMatches;
rp += stepLiterals;
if ( rp >= rpEndSafe )
goto done;
rp32 = read32(rp);
hash = FAST_HASH_FUNC(rp, rp32 );
}
//-------------------------------
found_match:
// found something
//-------------------------
// update hash now so lazy can see it :
#if 1 // pretty important to compression
hashTable16[ FAST_HASH_INDEX(hash,hashCycle) ] = (U16) curpos;
#ifdef DO_FAST_2ND_HASH
// do NOT step hashCycle !
//hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
hashTable16[ FAST_HASH_INDEX(hash2,hashCycle) ] = (U16) curpos;
#endif
#if FAST_HASH_DEPTH > 1
hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
#endif
#endif
//-----------------------------------
const U8 * match_start = rp;
rp += 4;
while( rp < rpEndSafe )
{
UINTr big1 = readR(rp);
UINTr big2 = readR(rp-matchOff);
if ( big1 == big2 )
{
rp += RAD_PTRBYTES;
continue;
}
else
{
rp += GetNumBytesZeroNeverAllR(big1^big2);
break;
}
}
rp = RR_MIN(rp,rpMatchEnd);
//-------------------------------
// rp is now at the *end* of the match
//-------------------------------
// check lazy match too
#ifdef DO_FAST_LAZY_MATCH
if (rp< rpEndSafe)
{
const U8 * lazyrp = match_start + 1;
//SINTa lazypos = rrPtrDiff(lazyrp - zeroPosPtr);
SINTa lazypos = curpos + 1;
RR_ASSERT( lazypos == rrPtrDiff(lazyrp - zeroPosPtr) );
U32 lazyrp32 = read32(lazyrp);
const U8 * lazyhashrp;
SINTa lazymatchOff;
U32 lazyHash = FAST_HASH_FUNC(lazyrp, lazyrp32 );
#ifdef DO_FAST_2ND_HASH
U32 lazyhash2 = LZB_SecondHash4(lazyrp32) & hash_table_mask;
#endif
#if FAST_HASH_DEPTH > 1
for(int d=0;d<FAST_HASH_DEPTH;d++)
#endif
{
U16 hashpos16 = hashTable16[ FAST_HASH_INDEX(lazyHash,d) ];
lazymatchOff = (U16)(lazypos - hashpos16);
RR_ASSERT( lazymatchOff >= 0 );
RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET );
{
lazyhashrp = lazyrp - lazymatchOff;
const U32 hashrp32 = read32(lazyhashrp);
if ( lazyrp32 == hashrp32 && lazymatchOff != 0 )
{
goto lazy_found_match;
}
}
}
#ifdef DO_FAST_2ND_HASH
#if FAST_HASH_DEPTH > 1
for(int d=0;d<FAST_HASH_DEPTH;d++)
#endif
{
U16 hashpos16 = hashTable16[ FAST_HASH_INDEX(lazyhash2,d) ];
lazymatchOff = (U16)(lazypos - hashpos16);
RR_ASSERT( lazymatchOff >= 0 );
RR_ASSERT( lazymatchOff <= LZB_MAX_OFFSET );
{
lazyhashrp = lazyrp - lazymatchOff;
const U32 hashrp32 = read32(lazyhashrp);
if ( lazyrp32 == hashrp32 && lazymatchOff != 0 )
{
goto lazy_found_match;
}
}
}
#endif
if ( 0 )
{
lazy_found_match:
lazyrp += 4;
while( lazyrp < rpEndSafe )
{
UINTr big1 = readR(lazyrp);
UINTr big2 = readR(lazyrp-lazymatchOff);
if ( big1 == big2 )
{
lazyrp += RAD_PTRBYTES;
continue;
}
else
{
lazyrp += GetNumBytesZeroNeverAllR(big1^big2);
break;
}
}
lazyrp = RR_MIN(lazyrp,rpMatchEnd);
//S32 lazymatchLen = rrPtrDiff32( lazyrp - (match_start+1) );
//RR_ASSERT( lazymatchLen >= 4 );
if ( lazyrp >= rp+3 )
{
// yes take the lazy match
// put a literal :
match_start++;
// I had a bug where lazypos was set wrong for the hash fill
// it set it to the *end* of the normal match
// and for some reason that helped compression WTF WTF
//SINTa lazypos = rrPtrDiff(rp - zeroPosPtr); // 233647528
// with correct lazypos : 233651228
// really this shouldn't be necessary at all
// because I do an update of hash at all positions in the match including first!
#if 1 // with update disabled - 233690274
hashTable16[ FAST_HASH_INDEX(lazyHash,hashCycle) ] = (U16) lazypos;
#ifdef DO_FAST_2ND_HASH
// do NOT step hashCycle !
hashTable16[ FAST_HASH_INDEX(lazyhash2,hashCycle) ] = (U16) lazypos;
#endif
#if FAST_HASH_DEPTH > 1
hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
#endif
#endif
// and then drop out and do the lazy match :
//matchLen = lazymatchLen;
matchOff = (S32)lazymatchOff;
rp = lazyrp;
hashrp = lazyhashrp;
}
}
}
#endif
//---------------------------------------------------
// back up start of match that we missed due to stepLiterals !
// make sure we don't read off the start of the array
// this costs a little speed and gains a little compression
// 15662162 at 121.58 mb/s
// 15776473 at 127.92 mb/s
#if 1
/*
lzbf : 24,700,820 ->15,963,503 = 5.170 bpb = 1.547 to 1
encode : 0.171 seconds, 83.60 b/kc, rate= 144.54 M/s
decode : 0.014 seconds, 1002.64 b/kc, rate= 1733.57 M/s
*/
{
// 144 M/s
// back up start of match that we missed
// make sure we don't read off the start of the array
const U8 * rpm1 = match_start-1;
if ( rpm1 >= literals_start && hashrp > zeroPosPtr && rpm1[0] == hashrp[-1] )
{
rpm1--; hashrp-= 2;
while ( rpm1 >= literals_start && hashrp >= zeroPosPtr && rpm1[0] == *hashrp )
{
rpm1--;
hashrp--;
}
match_start = rpm1+1;
//rp = RR_MAX(rp,literals_start);
RR_ASSERT( match_start >= literals_start );
}
}
#endif
S32 matchLen = rrPtrDiff32( rp - match_start );
RR_ASSERT( matchLen >= 4 );
//===============================================
// chose a match
// output LRL (if any) and match
S32 cur_lrl = rrPtrDiff32(match_start - literals_start);
// catch expansion while writing :
if_unlikely ( cp+cur_lrl >= compExpandedPtr )
{
return rawLen+1;
}
cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff);
// skip the match :
literals_start = rp;
if ( rp >= rpEndSafe )
break;
// step & update hashes :
// (I already did cur pos)
#ifdef DO_FAST_UPDATE_MATCH_HASHES
// don't bother if it takes us to the end :
// (this check is not for speed it's to avoid the access violation)
const U8 * ptr = match_start+1;
U16 pos16 = (U16) rrPtrDiff( ptr - zeroPosPtr );
for(;ptr<rp;ptr++)
{
U32 hash_result = FAST_HASH_FUNC( ptr, read32(ptr) );
hashTable16[ FAST_HASH_INDEX(hash_result,hashCycle) ] = pos16; pos16++;
//hashCycle = (hashCycle+1)&FAST_HASH_CYCLE_MASK;
// helps a bit to NOT step cycle here
// the hash entries that come inside a match are of much lower quality
}
#endif
}
done:;
int cur_lrl = rrPtrDiff32(rpEnd - literals_start);
#if LZB_END_WITH_LITERALS
RR_ASSERT_ALWAYS(cur_lrl > 0 );
#endif
if ( cur_lrl > 0 )
{
// catch expansion while writing :
if ( cp+cur_lrl >= compExpandedPtr )
{
return rawLen+1;
}
cp = LZB_OutputLast(cp,cur_lrl,literals_start);
}
SINTa compLen = rrPtrDiff( cp - (U8 *)comp );
return compLen;
}
SINTa rr_lzb_simple_encode_fast(rr_lzb_simple_context * fh,
const void * raw, SINTa rawLen, void * comp)
{
rr_lzb_simple_context_init(fh); //,raw);
SINTa comp_len = rr_lzb_simple_encode_fast_sub(fh,raw,rawLen,comp);
if ( comp_len >= rawLen )
{
memcpy(comp,raw,rawLen);
return rawLen;
}
return comp_len;
}
#undef FAST_HASH_DEPTH_SHIFT
#undef DO_FAST_UPDATE_MATCH_HASHES
#undef DO_FAST_LAZY_MATCH
#undef DO_FAST_2ND_HASH
//=====================================================
#define FAST_HASH_DEPTH_SHIFT (0)
#undef FAST_MULTISTEP_LITERALS_SHIFT
#define FAST_MULTISTEP_LITERALS_SHIFT (4)
//-----------------------
// derived :
RR_COMPILER_ASSERT( FAST_HASH_DEPTH_SHIFT == 0 );
#undef FAST_HASH_FUNC
//#define FAST_HASH_FUNC(ptr,dword) ( LZB_Hash4(dword) & hash_table_mask )
#define FAST_HASH_FUNC(ptr,dword) ( (((dword)*2654435761U)>>16) & hash_table_mask )
// @@@@ ????
#define LZBVF_DO_BACKUP 0
//#define LZBVF_DO_BACKUP 1
static SINTa rr_lzb_simple_encode_veryfast_sub(rr_lzb_simple_context * fh,
const void * raw, SINTa rawLen, void * comp)
{
//SIMPLEPROFILE_SCOPE_N(lzbfast_sub,rawLen);
//THREADPROFILEFUNC();
U8 * cp = (U8 *)comp;
U8 * compExpandedPtr = cp + rawLen - 8;
const U8 * rp = (const U8 *)raw;
const U8 * rpEnd = rp+rawLen;
// we can match up to rpEnd
// but matches can't start past rpEndSafe
const U8 * rpMatchEnd = rpEnd - LZB_END_OF_BLOCK_NO_MATCH_ZONE;
const U8 * rpEndSafe = rpMatchEnd - LZB_MML;
if ( rpEndSafe <= (U8 *)raw )
{
// can't compress
return rawLen+1;
}
const U8 * literals_start = rp;
U16 * hashTable16 = fh->m_hashTable;
int hashTableSizeBits = fh->m_tableSizeBits;
U32 hash_table_mask = (U32)((1UL<<(hashTableSizeBits)) - 1);
const U8 * zeroPosPtr = (const U8 *)raw;
// first byte is always a literal
rp++;
for(;;)
{
U32 rp32 = read32(rp);
U32 hash = FAST_HASH_FUNC(rp, rp32 );
const U8 * hashrp;
S32 matchOff;
UINTr failedMatches;
// loop while no match found :
// first loop with step = 1
// @@
//int step1count = (1<<FAST_MULTISTEP_LITERALS_SHIFT); // full count
int step1count = (1<<(FAST_MULTISTEP_LITERALS_SHIFT-1)); // half count
while(step1count--)
{
SINTa curpos = rrPtrDiff(rp - zeroPosPtr);
RR_ASSERT( curpos >= 0 );
U16 hashpos16 = hashTable16[hash];
hashTable16[ hash ] = (U16) curpos;
matchOff = (U16)(curpos - hashpos16);
RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET );
hashrp = rp - matchOff;
const U32 hashrp32 = read32(hashrp);
if ( rp32 == hashrp32 && matchOff != 0 )
{
goto found_match;
}
if ( ++rp >= rpEndSafe )
goto done;
rp32 = read32(rp);
hash = FAST_HASH_FUNC(rp, rp32 );
}
// step starts at 2 :
failedMatches = (2<<FAST_MULTISTEP_LITERALS_SHIFT);
for(;;)
{
SINTa curpos = rrPtrDiff(rp - zeroPosPtr);
RR_ASSERT( curpos >= 0 );
U16 hashpos16 = hashTable16[hash];
hashTable16[ hash ] = (U16) curpos;
matchOff = (U16)(curpos - hashpos16);
RR_ASSERT( matchOff >= 0 && matchOff <= LZB_MAX_OFFSET );
hashrp = rp - matchOff;
const U32 hashrp32 = read32(hashrp);
if ( rp32 == hashrp32 && matchOff != 0 )
{
goto found_match;
}
UINTr stepLiterals = (failedMatches>>FAST_MULTISTEP_LITERALS_SHIFT);
RR_ASSERT( stepLiterals >= 1 );
++failedMatches;
rp += stepLiterals;
if ( rp >= rpEndSafe )
goto done;
rp32 = read32(rp);
hash = FAST_HASH_FUNC(rp, rp32 );
}
//-------------------------------
found_match:;
// found something
#if LZBVF_DO_BACKUP
// alternative backup using counter :
S32 cur_lrl = rrPtrDiff32(rp - literals_start);
int neg_max_backup = - RR_MIN(cur_lrl , rrPtrDiff32(hashrp - zeroPosPtr) );
int neg_backup = -1;
if( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] )
{
neg_backup--;
while( neg_backup >= neg_max_backup && rp[neg_backup] == hashrp[neg_backup] )
{
neg_backup--;
}
neg_backup++;
rp += neg_backup;
cur_lrl += neg_backup;
RR_ASSERT( cur_lrl >= 0 );
RR_ASSERT( cur_lrl == rrPtrDiff32(rp - literals_start) );
}
#else
S32 cur_lrl = rrPtrDiff32(rp - literals_start);
#endif
// catch expansion while writing :
if_unlikely ( cp+cur_lrl >= compExpandedPtr )
{
return rawLen+1;
}
RR_ASSERT( matchOff >= 1 );
//---------------------------------------
// find rest of match len
// save pointer to start of match
// walk rp ahead to end of match
const U8 * match_start = rp;
rp += 4;
while( rp < rpEndSafe )
{
UINTr big1 = readR(rp);
UINTr big2 = readR(rp-matchOff);
if ( big1 == big2 )
{
rp += RAD_PTRBYTES;
continue;
}
else
{
rp += GetNumBytesZeroNeverAllR(big1^big2);
break;
}
}
rp = RR_MIN(rp,rpMatchEnd);
S32 matchLen = rrPtrDiff32( rp - match_start );
//===============================================
// chose a match
// output LRL (if any) and match
cp = LZB_Output(cp,cur_lrl,literals_start,matchLen,matchOff);
// skip the match :
literals_start = rp;
if ( rp >= rpEndSafe )
goto done;
}
done:;
int cur_lrl = rrPtrDiff32(rpEnd - literals_start);
#if LZB_END_WITH_LITERALS
RR_ASSERT_ALWAYS(cur_lrl > 0 );
#endif
if ( cur_lrl > 0 )
{
// catch expansion while writing :
if ( cp+cur_lrl >= compExpandedPtr )
{
return rawLen+1;
}
cp = LZB_OutputLast(cp,cur_lrl,literals_start);
}
SINTa compLen = rrPtrDiff( cp - (U8 *)comp );
return compLen;
}
SINTa rr_lzb_simple_encode_veryfast(rr_lzb_simple_context * fh,
const void * raw, SINTa rawLen, void * comp)
{
rr_lzb_simple_context_init(fh); //,raw);
SINTa comp_len = rr_lzb_simple_encode_veryfast_sub(fh,raw,rawLen,comp);
if ( comp_len >= rawLen )
{
memcpy(comp,raw,rawLen);
return rawLen;
}
return comp_len;
}
#undef FAST_HASH_DEPTH_SHIFT
#undef DO_FAST_UPDATE_MATCH_HASHES
#undef DO_FAST_LAZY_MATCH
#undef DO_FAST_2ND_HASH
//=====================================================
// vim:noet:sw=4:ts=4