From a5f9a8fbcea19f50aa3594ce2dbf5a13c5455ecc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 08:27:56 +1030 Subject: [PATCH] Tim's ilog module. --- ccan/ilog/_info.c | 45 +++++++++++++ ccan/ilog/ilog.c | 134 +++++++++++++++++++++++++++++++++++++++ ccan/ilog/ilog.h | 147 +++++++++++++++++++++++++++++++++++++++++++ ccan/ilog/test/run.c | 90 ++++++++++++++++++++++++++ 4 files changed, 416 insertions(+) create mode 100644 ccan/ilog/_info.c create mode 100644 ccan/ilog/ilog.c create mode 100644 ccan/ilog/ilog.h create mode 100644 ccan/ilog/test/run.c diff --git a/ccan/ilog/_info.c b/ccan/ilog/_info.c new file mode 100644 index 00000000..2b4aeb2e --- /dev/null +++ b/ccan/ilog/_info.c @@ -0,0 +1,45 @@ +/** + * ilog - Integer logarithm. + * + * ILOG_32() and ILOG_64() compute the minimum number of bits required to store + * an unsigned 32-bit or 64-bit value without any leading zero bits. + * This can also be thought of as the location of the highest set bit, with + * counting starting from one (so that 0 returns 0, 1 returns 1, and 2**31 + * returns 32). + * When the value is known to be non-zero ILOGNZ_32() and ILOGNZ_64() can + * compile into as few as two instructions, one of which may get optimized out + * later. + * STATIC_ILOG_32 and STATIC_ILOG_64 allow computation on compile-time + * constants, so other compile-time constants can be derived from them. + * + * Example: + * #include + * #include + * #include + * + * int main(void){ + * int i; + * printf("ILOG_32(0x%08X)=%i\n",0,ILOG_32(0)); + * for(i=1;i<=STATIC_ILOG_32(USHRT_MAX);i++){ + * uint32_t v; + * v=(uint32_t)1U< +#include "config.h" + +int main(int _argc,const char *_argv[]){ + /*Expect exactly one argument.*/ + if(_argc!=2)return 1; + if(strcmp(_argv[1],"depends")==0){ + /*PRINTF-CCAN-PACKAGES-YOU-NEED-ONE-PER-LINE-IF-ANY*/ + return 0; + } + return 1; +} diff --git a/ccan/ilog/ilog.c b/ccan/ilog/ilog.c new file mode 100644 index 00000000..ed39d192 --- /dev/null +++ b/ccan/ilog/ilog.c @@ -0,0 +1,134 @@ +/*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).*/ +#include "ilog.h" +#include + +/*The fastest fallback strategy for platforms with fast multiplication appears + to be based on de Bruijn sequences~\cite{LP98}. + Tests confirmed this to be true even on an ARM11, where it is actually faster + than using the native clz instruction. + Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where + multiplication or table lookups are too expensive. + + @UNPUBLISHED{LP98, + author="Charles E. Leiserson and Harald Prokop", + title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", + month=Jun, + year=1998, + note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" + }*/ +#if !defined(ILOG_NODEBRUIJN)&& \ + !defined(CLZ32)||!defined(CLZ64)&&LONG_MAX<9223372036854775807LL +static const unsigned char DEBRUIJN_IDX32[32]={ + 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, + 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 +}; +#endif + +int ilog32(uint32_t _v){ +#if defined(CLZ32) + return (CLZ32_OFFS-CLZ32(_v))&-!!_v; +#else +/*On a Pentium M, this branchless version tested as the fastest version without + multiplications on 1,000,000,000 random 32-bit integers, edging out a + similar version with branches, and a 256-entry LUT version.*/ +# if defined(ILOG_NODEBRUIJN) + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFU)<<4; + _v>>=m; + ret|=m; + m=(_v>0xFFU)<<3; + _v>>=m; + ret|=m; + m=(_v>0xFU)<<2; + _v>>=m; + ret|=m; + m=(_v>3)<<1; + _v>>=m; + ret|=m; + ret+=_v>1; + return ret; +/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ +# else + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; + return ret; +# endif +#endif +} + +int ilog64(uint64_t _v){ +#if defined(CLZ64) + return (CLZ64_OFFS-CLZ64(_v))&-!!_v; +#else +# if defined(ILOG_NODEBRUIJN) + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + m=(v>0xFFFFU)<<4; + v>>=m; + ret|=m; + m=(v>0xFFU)<<3; + v>>=m; + ret|=m; + m=(v>0xFU)<<2; + v>>=m; + ret|=m; + m=(v>3)<<1; + v>>=m; + ret|=m; + ret+=v>1; + return ret; +# else +/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ +# if LONG_MAX<9223372036854775807LL + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + v|=v>>1; + v|=v>>2; + v|=v>>4; + v|=v>>8; + v|=v>>16; + v=(v>>1)+1; + ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; + return ret; +/*Otherwise do it in one 64-bit operation.*/ +# else + static const unsigned char DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v|=_v>>32; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; + return ret; +# endif +# endif +#endif +} diff --git a/ccan/ilog/ilog.h b/ccan/ilog/ilog.h new file mode 100644 index 00000000..f74a3507 --- /dev/null +++ b/ccan/ilog/ilog.h @@ -0,0 +1,147 @@ +#if !defined(_ilog_H) +# define _ilog_H (1) +# include + +# ifdef __GNUC_PREREQ +/*Tag our functions as idempotent to aid optimization, if possible.*/ +# if __GNUC_PREREQ(2,5) +# define IDEMPOTENT __attribute__((const)) +# endif +# if __GNUC_PREREQ(3,4) +# include +/*Note the casts to (int) below: this prevents CLZ{32|64}_OFFS from "upgrading" + the type of an entire expression to an (unsigned) size_t.*/ +# if INT_MAX>=2147483647 +# define CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define CLZ32(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=2147483647L +# define CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define CLZ32(_x) (__builtin_clzl(_x)) +# endif +# if INT_MAX>=9223372036854775807LL +# define CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define CLZ64(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=9223372036854775807LL +# define CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define CLZ64(_x) (__builtin_clzl(_x)) +# elif LLONG_MAX>=9223372036854775807LL +# define CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT) +# define CLZ64(_x) (__builtin_clzll(_x)) +# endif +# endif +# endif + +/*If you have some other compiler which defines its own clz-style builtin, + implement a check for it here.*/ + +# if !defined(IDEMPOTENT) +# define IDEMPOTENT +# endif + + + +/** + * ilog32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The ILOG_32() or ILOGNZ_32() macros may be able to use a builtin function + * instead, which should be faster. + */ +int ilog32(uint32_t _v)IDEMPOTENT; +/** + * ilog64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The ILOG_64() or ILOGNZ_64() macros may be able to use a builtin function + * instead, which should be faster. + */ +int ilog64(uint64_t _v)IDEMPOTENT; + +# undef IDEMPOTENT + + +# if defined(CLZ32) +/** + * ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value. + * @_v: A non-zero 32-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use ILOG_32() instead. + */ +# define ILOGNZ_32(_v) (CLZ32_OFFS-CLZ32(_v)) +/** + * ILOG_32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define ILOG_32(_v) (ILOGNZ_32(_v)&-!!(_v)) +# else +# define ILOGNZ_32(_v) (ilog32(_v)) +# define ILOG_32(_v) (ilog32(_v)) +# endif + +# if defined(CLZ64) +/** + * ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value. + * @_v: A non-zero 64-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use ILOG_64() instead. + */ +# define ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v)) +/** + * ILOG_64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define ILOG_64(_v) (ILOGNZ_64(_v)&-!!(_v)) +# else +# define ILOGNZ_64(_v) (ilog64(_v)) +# define ILOG_64(_v) (ilog64(_v)) +# endif + +# define STATIC_ILOG0(_v) (!!(_v)) +# define STATIC_ILOG1(_v) (((_v)&0x2)?2:STATIC_ILOG0(_v)) +# define STATIC_ILOG2(_v) (((_v)&0xC)?2+STATIC_ILOG1((_v)>>2):STATIC_ILOG1(_v)) +# define STATIC_ILOG3(_v) \ + (((_v)&0xF0)?4+STATIC_ILOG2((_v)>>4):STATIC_ILOG2(_v)) +# define STATIC_ILOG4(_v) \ + (((_v)&0xFF00)?8+STATIC_ILOG3((_v)>>8):STATIC_ILOG3(_v)) +# define STATIC_ILOG5(_v) \ + (((_v)&0xFFFF0000)?16+STATIC_ILOG4((_v)>>16):STATIC_ILOG4(_v)) +# define STATIC_ILOG6(_v) \ + (((_v)&0xFFFFFFFF00000000ULL)?32+STATIC_ILOG5((_v)>>32):STATIC_ILOG5(_v)) +/** + * STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. + * @_v: A non-negative 32-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define STATIC_ILOG_32(_v) (STATIC_ILOG5((uint32_t)(_v))) +/** + * STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. + * @_v: A non-negative 64-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define STATIC_ILOG_64(_v) (STATIC_ILOG6((uint64_t)(_v))) + +#endif diff --git a/ccan/ilog/test/run.c b/ccan/ilog/test/run.c new file mode 100644 index 00000000..81935840 --- /dev/null +++ b/ccan/ilog/test/run.c @@ -0,0 +1,90 @@ +#include +#include "ilog/ilog.h" +#include "tap/tap.h" +#if defined(__GNUC_PREREQ) +# if __GNUC_PREREQ(4,2) +# pragma GCC diagnostic ignored "-Wparentheses" +# endif +#endif + +/*Dead simple (but slow) versions to compare against.*/ + +static int test_ilog32(uint32_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +static int test_ilog64(uint64_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +#define NTRIALS (64) + +int main(int _argc,const char *_argv[]){ + int nmatches; + int i; + int j; + /*This is how many tests you plan to run.*/ + plan_tests(2); + nmatches=0; + for(i=0;i<=32;i++){ + uint32_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint32_t)1U<>(33-i>>1)>>(32-i>>1); + } + } + ok1(nmatches==3*(32+1)*NTRIALS); + nmatches=0; + for(i=0;i<=64;i++){ + uint64_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint64_t)1U<>(65-i>>1)>>(64-i>>1)); + } + } + ok1(nmatches==3*(64+1)*NTRIALS); + return exit_status(); +} -- 2.39.2