1 /*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).*/
5 /*The fastest fallback strategy for platforms with fast multiplication appears
6 to be based on de Bruijn sequences~\cite{LP98}.
7 Tests confirmed this to be true even on an ARM11, where it is actually faster
8 than using the native clz instruction.
9 Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where
10 multiplication or table lookups are too expensive.
13 author="Charles E. Leiserson and Harald Prokop",
14 title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
17 note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
19 #if !defined(ILOG_NODEBRUIJN)&& \
20 !defined(CLZ32)||!defined(CLZ64)&&LONG_MAX<9223372036854775807LL
21 static const unsigned char DEBRUIJN_IDX32[32]={
22 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
23 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
27 int ilog32(uint32_t _v){
29 return (CLZ32_OFFS-CLZ32(_v))&-!!_v;
31 /*On a Pentium M, this branchless version tested as the fastest version without
32 multiplications on 1,000,000,000 random 32-bit integers, edging out a
33 similar version with branches, and a 256-entry LUT version.*/
34 # if defined(ILOG_NODEBRUIJN)
52 /*This de Bruijn sequence version is faster if you have a fast multiplier.*/
62 ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
68 int ilog64(uint64_t _v){
70 return (CLZ64_OFFS-CLZ64(_v))&-!!_v;
72 # if defined(ILOG_NODEBRUIJN)
77 m=(_v>0xFFFFFFFFU)<<5;
95 /*If we don't have a 64-bit word, split it into two 32-bit halves.*/
96 # if LONG_MAX<9223372036854775807LL
101 m=(_v>0xFFFFFFFFU)<<5;
110 ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
112 /*Otherwise do it in one 64-bit operation.*/
114 static const unsigned char DEBRUIJN_IDX64[64]={
115 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
116 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
117 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
118 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
129 ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];