mirror of
https://github.com/Ed94/raddebugger.git
synced 2026-06-13 07:32:23 -07:00
fix radsort multi-inclusion
This commit is contained in:
Vendored
+69
-64
@@ -1,5 +1,8 @@
|
||||
// New radsort.
|
||||
|
||||
#if !defined(RADSORT_H)
|
||||
#define RADSORT_H
|
||||
|
||||
// To Use:
|
||||
// Create a less_than function and then call radsort.
|
||||
//
|
||||
@@ -49,7 +52,7 @@ typedef struct bytes8 { char b[8]; } bytes8;
|
||||
|
||||
static RSFORCEINLINE void radsortswapper( void * a, void * b, size_t size )
|
||||
{
|
||||
#define RSSWAPMEM(type) ( size >= sizeof(type) ) { type v = *(type const*)a; *(type*)a = *(type const*)b; *(type*)b = v; a=rsadd_ptr(a,sizeof(type)); b=rsadd_ptr(b,sizeof(type)); size -= sizeof(type); }
|
||||
#define RSSWAPMEM(type) ( size >= sizeof(type) ) { type v = *(type const*)a; *(type*)a = *(type const*)b; *(type*)b = v; a=rsadd_ptr(a,sizeof(type)); b=rsadd_ptr(b,sizeof(type)); size -= sizeof(type); }
|
||||
|
||||
while RSSWAPMEM(bytes64);
|
||||
if RSSWAPMEM(bytes32);
|
||||
@@ -59,13 +62,13 @@ static RSFORCEINLINE void radsortswapper( void * a, void * b, size_t size )
|
||||
if RSSWAPMEM(short);
|
||||
if RSSWAPMEM(char);
|
||||
|
||||
#undef RSSWAPMEM
|
||||
#undef RSSWAPMEM
|
||||
}
|
||||
|
||||
// since size is always constant, this big function compiles down to 4 to 12 instructions (for normal structs 4-6)
|
||||
static RSFORCEINLINE void radsortmover( void * a, void * b, size_t size )
|
||||
{
|
||||
#define RSMOVEMEM(type) ( size >= sizeof(type) ) { *(type*)a = *(type const*)b; a=rsadd_ptr(a,sizeof(type)); b=rsadd_ptr(b,sizeof(type)); size -= sizeof(type); }
|
||||
#define RSMOVEMEM(type) ( size >= sizeof(type) ) { *(type*)a = *(type const*)b; a=rsadd_ptr(a,sizeof(type)); b=rsadd_ptr(b,sizeof(type)); size -= sizeof(type); }
|
||||
|
||||
while RSMOVEMEM(bytes64);
|
||||
if RSMOVEMEM(bytes32);
|
||||
@@ -75,7 +78,7 @@ static RSFORCEINLINE void radsortmover( void * a, void * b, size_t size )
|
||||
if RSMOVEMEM(short);
|
||||
if RSMOVEMEM(char);
|
||||
|
||||
#undef RSMOVEMEM
|
||||
#undef RSMOVEMEM
|
||||
}
|
||||
|
||||
// these macros generate tiny move/swap routines that don't go through the generic function above (mostly for debug build performance)
|
||||
@@ -110,17 +113,17 @@ typedef void rs_small_sort_func( void * left, size_t n, size_t element_size, is_
|
||||
typedef struct RS_MAX_BUBBLE_BUF { char b[RS_SMALL_FLIP_TO_INSERTION_GT_SIZE]; } RS_MAX_BUBBLE_BUF;
|
||||
|
||||
#define radsort( start, len, is_before_func ) \
|
||||
do { \
|
||||
char __rs_tmp[ sizeof( (start)[0] ) ]; \
|
||||
radsortinternal( start, len, sizeof( (start)[0] ), \
|
||||
is_before_func, \
|
||||
radsortswapsize( sizeof( (start)[0] ) ), \
|
||||
radsortmovesize( sizeof( (start)[0] ) ), \
|
||||
( sizeof( (start)[0] ) > RS_SMALL_FLIP_TO_INSERTION_GT_SIZE ) ? radinsertionsort : radbubble2sort, \
|
||||
( sizeof( (start)[0] ) > RS_SMALL_FLIP_TO_INSERTION_GT_SIZE ) ? RSS_FLIP_TO_SMALL_SORT_INSERTION : RSS_FLIP_TO_SMALL_SORT_BUBBLE2, \
|
||||
&__rs_tmp \
|
||||
); \
|
||||
} while (0)
|
||||
do { \
|
||||
char __rs_tmp[ sizeof( (start)[0] ) ]; \
|
||||
radsortinternal( start, len, sizeof( (start)[0] ), \
|
||||
is_before_func, \
|
||||
radsortswapsize( sizeof( (start)[0] ) ), \
|
||||
radsortmovesize( sizeof( (start)[0] ) ), \
|
||||
( sizeof( (start)[0] ) > RS_SMALL_FLIP_TO_INSERTION_GT_SIZE ) ? radinsertionsort : radbubble2sort, \
|
||||
( sizeof( (start)[0] ) > RS_SMALL_FLIP_TO_INSERTION_GT_SIZE ) ? RSS_FLIP_TO_SMALL_SORT_INSERTION : RSS_FLIP_TO_SMALL_SORT_BUBBLE2, \
|
||||
&__rs_tmp \
|
||||
); \
|
||||
} while (0)
|
||||
#define radheapsort( start, len, is_before_func ) do { radheapsortinteral( start, len, sizeof( ((start)[0]) ), is_before_func, radsortswapsize( sizeof( ((start)[0]) ) ) ); } while (0)
|
||||
|
||||
|
||||
@@ -132,27 +135,27 @@ RSFORCEINLINE void radheapsortinteral( void * start, size_t len, size_t element_
|
||||
void * left;
|
||||
void * right;
|
||||
size_t length;
|
||||
|
||||
|
||||
left = start;
|
||||
right = rsadd_ptr_elements( start, len - 1 );
|
||||
length = len;
|
||||
|
||||
|
||||
if ( length > 1 )
|
||||
{
|
||||
// unusual small in-place heap sort
|
||||
void * i; void * ind; void * v; void * n;
|
||||
size_t s, k;
|
||||
|
||||
|
||||
s = length >> 1;
|
||||
i = rsadd_ptr_elements( left, s );
|
||||
|
||||
|
||||
for(;;)
|
||||
{
|
||||
--s;
|
||||
i = rsadd_ptr_elements( i, -1 );
|
||||
ind = i;
|
||||
k = ( s << 1 ) + 1;
|
||||
|
||||
|
||||
for(;;)
|
||||
{
|
||||
v = rsadd_ptr_elements( left, k );
|
||||
@@ -163,21 +166,21 @@ RSFORCEINLINE void radheapsortinteral( void * start, size_t len, size_t element_
|
||||
++k;
|
||||
v = n;
|
||||
}
|
||||
|
||||
|
||||
if ( is_before( ind, v ) )
|
||||
{
|
||||
swapper( ind, v, element_size );
|
||||
ind = v;
|
||||
k = ( k << 1 ) + 1;
|
||||
|
||||
|
||||
if ( k < length )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// if s is non-zero, we are still building the heap!
|
||||
if ( s )
|
||||
break;
|
||||
|
||||
|
||||
swapper( left, right, element_size );
|
||||
right = rsadd_ptr_elements( right, -1 );
|
||||
ind = left;
|
||||
@@ -199,18 +202,18 @@ RSFORCEINLINE void radheapsortinteral( void * start, size_t len, size_t element_
|
||||
static RSFORCEINLINE void radsortgetmedian5( void * output, void * left, void * right, size_t length, size_t element_size, is_before_func * is_before, swap_func * swapper, move_func * mover )
|
||||
{
|
||||
RS_MAX_SIMPLE_BUF mb0,mb1,mb2,mb3,mb4;
|
||||
|
||||
|
||||
mover( &mb0, left, element_size );
|
||||
mover( &mb1, rsadd_ptr_elements( left, length >> 2 ), element_size );
|
||||
mover( &mb2, rsadd_ptr_elements( left, length >> 1 ), element_size );
|
||||
mover( &mb3, rsadd_ptr_elements( left, length - (length >> 2) ), element_size );
|
||||
mover( &mb4, right, element_size );
|
||||
|
||||
|
||||
// Basically, for simple compares, and for simple in-register types, this funcion
|
||||
// must turn info 7 compares and then 5-7 movs, and 12 cmovs. Any
|
||||
// compiler *should* do this - if this doesn't happen, then the compiler is
|
||||
// hosing you. You can put int 3s at the start and end of this function to check.
|
||||
|
||||
|
||||
rsswapsmaller( mb0, mb1 );
|
||||
rsswapsmaller( mb2, mb3 );
|
||||
rsswapsmaller( mb0, mb2 );
|
||||
@@ -226,8 +229,8 @@ static RSFORCEINLINE void radsortgetmedian5( void * output, void * left, void *
|
||||
static RSFORCEINLINE void radsortgetmedian9( void * output, void * left, void * right, size_t length, size_t element_size, is_before_func * is_before, swap_func * swapper, move_func * mover )
|
||||
{
|
||||
RS_MAX_SIMPLE_BUF mb0,mb1,mb2,mb3,mb4,mb5,mb6,mb7,mb8; // todo, temp mem!
|
||||
|
||||
#ifdef RS_PREFETCH
|
||||
|
||||
#ifdef RS_PREFETCH
|
||||
RS_PREFETCH( left );
|
||||
RS_PREFETCH( right );
|
||||
RS_PREFETCH( rsadd_ptr_elements( left, length >> 3 ) );
|
||||
@@ -237,8 +240,8 @@ static RSFORCEINLINE void radsortgetmedian9( void * output, void * left, void *
|
||||
RS_PREFETCH( rsadd_ptr_elements( left, (length >> 1) + (0 >> 3) ) );
|
||||
RS_PREFETCH( rsadd_ptr_elements( left, length - (length >> 2) ) );
|
||||
RS_PREFETCH( rsadd_ptr_elements( left, length - (length >> 3) ) );
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
mover( &mb0, left, element_size );
|
||||
mover( &mb1, rsadd_ptr_elements( left, length >> 3 ), element_size );
|
||||
mover( &mb2, rsadd_ptr_elements( left, length >> 2 ), element_size );
|
||||
@@ -248,7 +251,7 @@ static RSFORCEINLINE void radsortgetmedian9( void * output, void * left, void *
|
||||
mover( &mb6, rsadd_ptr_elements( left, length - (length >> 2) ), element_size );
|
||||
mover( &mb7, rsadd_ptr_elements( left, length - (length >> 3) ), element_size );
|
||||
mover( &mb8, right, element_size );
|
||||
|
||||
|
||||
// Basically, for simple compares, and for simple in-register types, this funcion
|
||||
// should turn info 19 compares and then 15-19 movs, and 36 cmovs. However,
|
||||
// most compilers can only so-so job at this, and you'll end up with 3-4 jumps.
|
||||
@@ -272,7 +275,7 @@ static RSFORCEINLINE void radsortgetmedian9( void * output, void * left, void *
|
||||
rsswapsmaller( mb4, mb6 );
|
||||
rsswapsmaller( mb2, mb3 );
|
||||
rsswapsmaller( mb4, mb5 );
|
||||
|
||||
|
||||
mover( output, &mb3, element_size );
|
||||
if ( is_before( &mb4, &mb3 ) ) mover( output, &mb4, element_size );
|
||||
}
|
||||
@@ -301,13 +304,13 @@ static RSFORCEINLINE void radbubble2sort( void * left, size_t n, size_t element_
|
||||
void * i; // todo - test with bigger blocks
|
||||
void * s = rsadd_ptr_elements( left, 2 );
|
||||
RS_MAX_BUBBLE_BUF x, y, z;
|
||||
|
||||
#define rsbubbleswap( X, Y ) { int cond; cond = is_before( &Y, &X); mover( tmp, &X, element_size ); if ( cond ) mover( &X, &Y, element_size ); if ( cond ) mover( &Y, tmp, element_size ); }
|
||||
|
||||
|
||||
#define rsbubbleswap( X, Y ) { int cond; cond = is_before( &Y, &X); mover( tmp, &X, element_size ); if ( cond ) mover( &X, &Y, element_size ); if ( cond ) mover( &Y, tmp, element_size ); }
|
||||
|
||||
for ( i = rsadd_ptr_elements( left, (int)n - 1 ) ; i > left ; i = rsadd_ptr_elements( i, -2 ) )
|
||||
{
|
||||
void * j, * jm2;
|
||||
|
||||
|
||||
// load x & y
|
||||
mover( &x, left, element_size );
|
||||
mover( &y, rsadd_ptr_elements( left, 1 ), element_size );
|
||||
@@ -317,7 +320,7 @@ static RSFORCEINLINE void radbubble2sort( void * left, size_t n, size_t element_
|
||||
|
||||
// for ints, this loop needs to be 4 cmps, 6 cmovs, and 5 movs
|
||||
// anything else will kill performance
|
||||
|
||||
|
||||
jm2 = left;
|
||||
for ( j = s ; j <= i ; j = rsadd_ptr_elements( j, 1 ) )
|
||||
{
|
||||
@@ -329,7 +332,7 @@ static RSFORCEINLINE void radbubble2sort( void * left, size_t n, size_t element_
|
||||
mover( jm2, &z, element_size );
|
||||
jm2 = rsadd_ptr_elements( jm2, 1 );
|
||||
}
|
||||
|
||||
|
||||
mover( rsadd_ptr_elements( i, -1 ), &x, element_size );
|
||||
mover( i, &y, element_size );
|
||||
}
|
||||
@@ -340,7 +343,7 @@ static RSFORCEINLINE void radinsertionsort(void * start, size_t len, size_t elem
|
||||
{
|
||||
void * cur;
|
||||
void * prev;
|
||||
|
||||
|
||||
cur = rsadd_ptr_elements( start, 1 );
|
||||
--len;
|
||||
prev = start;
|
||||
@@ -424,25 +427,25 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
{
|
||||
void * left;
|
||||
size_t length;
|
||||
|
||||
|
||||
if ( len <= 1 )
|
||||
return;
|
||||
|
||||
#if _DEBUG
|
||||
if ( element_size > sizeof( RS_MAX_SIMPLE_BUF ) )
|
||||
__debugbreak();
|
||||
#endif
|
||||
|
||||
|
||||
#if _DEBUG
|
||||
if ( element_size > sizeof( RS_MAX_SIMPLE_BUF ) )
|
||||
__debugbreak();
|
||||
#endif
|
||||
|
||||
// stack for no recursion
|
||||
typedef struct stks
|
||||
{
|
||||
void * left;
|
||||
size_t len;
|
||||
} stks;
|
||||
|
||||
|
||||
stks stk[ RSS_MAX_RECURSE ];
|
||||
stks * stk_ptr = stk + RSS_MAX_RECURSE;
|
||||
|
||||
|
||||
// we use the stk_ptr to tell when to flip to heap.
|
||||
// when we hit the end of the stack, we heap it, so
|
||||
// back the start of the stack to log1.5 of len
|
||||
@@ -453,10 +456,10 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
length = ( length >> 1 ) + ( length >> 2 );
|
||||
} while ( length );
|
||||
stk_ptr[ -1 ].len = 0;
|
||||
|
||||
|
||||
left = start;
|
||||
length = len;
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
for(;;)
|
||||
@@ -484,11 +487,11 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
void * rightequalpiv;
|
||||
size_t leftlen;
|
||||
void * scan, * piv, * rend, * right;
|
||||
|
||||
|
||||
CompilerReset(left); // we reset the compiler before each major sort
|
||||
|
||||
|
||||
right = rsadd_ptr_elements( left, length - 1 );
|
||||
|
||||
|
||||
// check for and correct inverted blocks
|
||||
scan = left;
|
||||
rend = right;
|
||||
@@ -499,7 +502,7 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
rend = rsadd_ptr_elements( rend, -1 );
|
||||
if ( scan >= rend ) break;
|
||||
}
|
||||
|
||||
|
||||
// scan to see if the block is in order (or all the same)
|
||||
scan = left;
|
||||
do
|
||||
@@ -511,12 +514,12 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
} while ( scan < right );
|
||||
// if we get out of the loop cleanly, this block is already sorted, so just fall out and do next block
|
||||
break;
|
||||
|
||||
doqsort:
|
||||
|
||||
|
||||
doqsort:
|
||||
|
||||
// get the median into copy
|
||||
radsortgetmedian( tmp, left, right, length, element_size, is_before, swapper, mover );
|
||||
|
||||
|
||||
// if scan != left, then we have a few in order, so we can skip them all if the final is under the copy
|
||||
if ( !is_before( scan, tmp ) )
|
||||
scan = left;
|
||||
@@ -524,7 +527,7 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
// skip values below the pivot at the start of the segment
|
||||
while( is_before( scan, tmp ) ) // the pivot will stop this loop
|
||||
scan = rsadd_ptr( scan, element_size );
|
||||
|
||||
|
||||
// skip values above and equal to the pivot at the end of the segment
|
||||
rend = right;
|
||||
if ( left == start )
|
||||
@@ -545,7 +548,7 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
while( is_before( tmp, rend ) ) // the pivot will stop this loop
|
||||
rend = rsadd_ptr_elements( rend, -1 );
|
||||
}
|
||||
|
||||
|
||||
// finally, do actual partitioning nanosort style - 65-70% of the
|
||||
// total time will be in this loop, for ints, this should be
|
||||
// 4 movs, 2 cmps, 1 cmov, 2 add, 1 jmp - 10 instructions
|
||||
@@ -560,7 +563,7 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
if ( adv ) piv = rsadd_ptr( piv, element_size ); // needs to be a cmov
|
||||
scan = rsadd_ptr( scan, element_size );
|
||||
}
|
||||
|
||||
|
||||
// now move the right side to skip over all of the equal values...
|
||||
// this loop should be 5 instructions
|
||||
rightequalpiv = piv;
|
||||
@@ -570,11 +573,11 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
break;
|
||||
rightequalpiv = rsadd_ptr_elements( rightequalpiv, 1 );
|
||||
}
|
||||
|
||||
|
||||
// ok, now get the size of each half and prepare to descend
|
||||
leftlen = rsdiff_ptr_elements( piv, left );
|
||||
length -= rsdiff_ptr_elements( rightequalpiv, left );
|
||||
|
||||
|
||||
// put the smaller segment on the stack
|
||||
if ( length < leftlen )
|
||||
{
|
||||
@@ -605,3 +608,5 @@ RSFORCEINLINE void radsortinternal( void * start, size_t len, size_t element_siz
|
||||
#undef rsadd_ptr
|
||||
#undef rsadd_ptr_elements
|
||||
#undef rsdiff_ptr_elements
|
||||
|
||||
#endif // RADSORT_H
|
||||
|
||||
Reference in New Issue
Block a user