From a5f9a8fbcea19f50aa3594ce2dbf5a13c5455ecc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Mar 2009 08:27:56 +1030
Subject: [PATCH] Tim's ilog module.

---
 ccan/ilog/_info.c    |  45 +++++++++++++
 ccan/ilog/ilog.c     | 134 +++++++++++++++++++++++++++++++++++++++
 ccan/ilog/ilog.h     | 147 +++++++++++++++++++++++++++++++++++++++++++
 ccan/ilog/test/run.c |  90 ++++++++++++++++++++++++++
 4 files changed, 416 insertions(+)
 create mode 100644 ccan/ilog/_info.c
 create mode 100644 ccan/ilog/ilog.c
 create mode 100644 ccan/ilog/ilog.h
 create mode 100644 ccan/ilog/test/run.c

diff --git a/ccan/ilog/_info.c b/ccan/ilog/_info.c
new file mode 100644
index 00000000..2b4aeb2e
--- /dev/null
+++ b/ccan/ilog/_info.c
@@ -0,0 +1,45 @@
+/**
+ * ilog - Integer logarithm.
+ *
+ * ILOG_32() and ILOG_64() compute the minimum number of bits required to store
+ *  an unsigned 32-bit or 64-bit value without any leading zero bits.
+ * This can also be thought of as the location of the highest set bit, with
+ *  counting starting from one (so that 0 returns 0, 1 returns 1, and 2**31
+ *  returns 32).
+ * When the value is known to be non-zero ILOGNZ_32() and ILOGNZ_64() can
+ *  compile into as few as two instructions, one of which may get optimized out
+ *  later.
+ * STATIC_ILOG_32 and STATIC_ILOG_64 allow computation on compile-time
+ *  constants, so other compile-time constants can be derived from them.
+ *
+ * Example:
+ *  #include <stdio.h>
+ *  #include <limits.h>
+ *  #include <ccan/ilog/ilog.h>
+ *
+ *  int main(void){
+ *    int i;
+ *    printf("ILOG_32(0x%08X)=%i\n",0,ILOG_32(0));
+ *    for(i=1;i<=STATIC_ILOG_32(USHRT_MAX);i++){
+ *      uint32_t v;
+ *      v=(uint32_t)1U<<i-1;
+ *      //Here we know v is non-zero, so we can use ILOGNZ_32().
+ *      printf("ILOG_32(0x%08X)=%i\n",v,ILOGNZ_32(v));
+ *    }
+ *    return 0;
+ *  }
+ *
+ * License: LGPL (v2 or later)
+ */
+#include <string.h>
+#include "config.h"
+
+int main(int _argc,const char *_argv[]){
+  /*Expect exactly one argument.*/
+  if(_argc!=2)return 1;
+  if(strcmp(_argv[1],"depends")==0){
+    /*PRINTF-CCAN-PACKAGES-YOU-NEED-ONE-PER-LINE-IF-ANY*/
+    return 0;
+  }
+  return 1;
+}
diff --git a/ccan/ilog/ilog.c b/ccan/ilog/ilog.c
new file mode 100644
index 00000000..ed39d192
--- /dev/null
+++ b/ccan/ilog/ilog.c
@@ -0,0 +1,134 @@
+/*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).*/
+#include "ilog.h"
+#include <limits.h>
+
+/*The fastest fallback strategy for platforms with fast multiplication appears
+   to be based on de Bruijn sequences~\cite{LP98}.
+  Tests confirmed this to be true even on an ARM11, where it is actually faster
+   than using the native clz instruction.
+  Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where
+   multiplication or table lookups are too expensive.
+
+  @UNPUBLISHED{LP98,
+    author="Charles E. Leiserson and Harald Prokop",
+    title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
+    month=Jun,
+    year=1998,
+    note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
+  }*/
+#if !defined(ILOG_NODEBRUIJN)&& \
+ !defined(CLZ32)||!defined(CLZ64)&&LONG_MAX<9223372036854775807LL
+static const unsigned char DEBRUIJN_IDX32[32]={
+   0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
+  31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
+};
+#endif
+
+int ilog32(uint32_t _v){
+#if defined(CLZ32)
+  return (CLZ32_OFFS-CLZ32(_v))&-!!_v;
+#else
+/*On a Pentium M, this branchless version tested as the fastest version without
+   multiplications on 1,000,000,000 random 32-bit integers, edging out a
+   similar version with branches, and a 256-entry LUT version.*/
+# if defined(ILOG_NODEBRUIJN)
+  int ret;
+  int m;
+  ret=_v>0;
+  m=(_v>0xFFFFU)<<4;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFFU)<<3;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFU)<<2;
+  _v>>=m;
+  ret|=m;
+  m=(_v>3)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=_v>1;
+  return ret;
+/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
+# else
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v=(_v>>1)+1;
+  ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
+  return ret;
+# endif
+#endif
+}
+
+int ilog64(uint64_t _v){
+#if defined(CLZ64)
+  return (CLZ64_OFFS-CLZ64(_v))&-!!_v;
+#else
+# if defined(ILOG_NODEBRUIJN)
+  uint32_t v;
+  int      ret;
+  int      m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(uint32_t)(_v>>m);
+  ret|=m;
+  m=(v>0xFFFFU)<<4;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFFU)<<3;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFU)<<2;
+  v>>=m;
+  ret|=m;
+  m=(v>3)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=v>1;
+  return ret;
+# else
+/*If we don't have a 64-bit word, split it into two 32-bit halves.*/
+#  if LONG_MAX<9223372036854775807LL
+  uint32_t v;
+  int      ret;
+  int      m;
+  ret=_v>0;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(uint32_t)(_v>>m);
+  ret|=m;
+  v|=v>>1;
+  v|=v>>2;
+  v|=v>>4;
+  v|=v>>8;
+  v|=v>>16;
+  v=(v>>1)+1;
+  ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
+  return ret;
+/*Otherwise do it in one 64-bit operation.*/
+#  else
+  static const unsigned char DEBRUIJN_IDX64[64]={
+     0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
+     5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
+    63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
+    62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
+  };
+  int ret;
+  ret=_v>0;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v|=_v>>32;
+  _v=(_v>>1)+1;
+  ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
+  return ret;
+#  endif
+# endif
+#endif
+}
diff --git a/ccan/ilog/ilog.h b/ccan/ilog/ilog.h
new file mode 100644
index 00000000..f74a3507
--- /dev/null
+++ b/ccan/ilog/ilog.h
@@ -0,0 +1,147 @@
+#if !defined(_ilog_H)
+# define _ilog_H (1)
+# include <stdint.h>
+
+# ifdef __GNUC_PREREQ
+/*Tag our functions as idempotent to aid optimization, if possible.*/
+#  if __GNUC_PREREQ(2,5)
+#   define IDEMPOTENT __attribute__((const))
+#  endif
+#  if __GNUC_PREREQ(3,4)
+#   include <limits.h>
+/*Note the casts to (int) below: this prevents CLZ{32|64}_OFFS from "upgrading"
+   the type of an entire expression to an (unsigned) size_t.*/
+#   if INT_MAX>=2147483647
+#    define CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define CLZ32(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=2147483647L
+#    define CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define CLZ32(_x) (__builtin_clzl(_x))
+#   endif
+#   if INT_MAX>=9223372036854775807LL
+#    define CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#    define CLZ64(_x) (__builtin_clz(_x))
+#   elif LONG_MAX>=9223372036854775807LL
+#    define CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#    define CLZ64(_x) (__builtin_clzl(_x))
+#   elif LLONG_MAX>=9223372036854775807LL
+#    define CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
+#    define CLZ64(_x) (__builtin_clzll(_x))
+#   endif
+#  endif
+# endif
+
+/*If you have some other compiler which defines its own clz-style builtin,
+   implement a check for it here.*/
+
+# if !defined(IDEMPOTENT)
+#  define IDEMPOTENT
+# endif
+
+
+
+/**
+ * ilog32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The ILOG_32() or ILOGNZ_32() macros may be able to use a builtin function
+ *  instead, which should be faster.
+ */
+int ilog32(uint32_t _v)IDEMPOTENT;
+/**
+ * ilog64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The ILOG_64() or ILOGNZ_64() macros may be able to use a builtin function
+ *  instead, which should be faster.
+ */
+int ilog64(uint64_t _v)IDEMPOTENT;
+
+# undef IDEMPOTENT
+
+
+# if defined(CLZ32)
+/**
+ * ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
+ * @_v: A non-zero 32-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use ILOG_32() instead.
+ */
+#  define ILOGNZ_32(_v) (CLZ32_OFFS-CLZ32(_v))
+/**
+ * ILOG_32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define ILOG_32(_v)   (ILOGNZ_32(_v)&-!!(_v))
+# else
+#  define ILOGNZ_32(_v) (ilog32(_v))
+#  define ILOG_32(_v)   (ilog32(_v))
+# endif
+
+# if defined(CLZ64)
+/**
+ * ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
+ * @_v: A non-zero 64-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use ILOG_64() instead.
+ */
+#  define ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
+/**
+ * ILOG_64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define ILOG_64(_v)   (ILOGNZ_64(_v)&-!!(_v))
+# else
+#  define ILOGNZ_64(_v) (ilog64(_v))
+#  define ILOG_64(_v)   (ilog64(_v))
+# endif
+
+# define STATIC_ILOG0(_v) (!!(_v))
+# define STATIC_ILOG1(_v) (((_v)&0x2)?2:STATIC_ILOG0(_v))
+# define STATIC_ILOG2(_v) (((_v)&0xC)?2+STATIC_ILOG1((_v)>>2):STATIC_ILOG1(_v))
+# define STATIC_ILOG3(_v) \
+ (((_v)&0xF0)?4+STATIC_ILOG2((_v)>>4):STATIC_ILOG2(_v))
+# define STATIC_ILOG4(_v) \
+ (((_v)&0xFF00)?8+STATIC_ILOG3((_v)>>8):STATIC_ILOG3(_v))
+# define STATIC_ILOG5(_v) \
+ (((_v)&0xFFFF0000)?16+STATIC_ILOG4((_v)>>16):STATIC_ILOG4(_v))
+# define STATIC_ILOG6(_v) \
+ (((_v)&0xFFFFFFFF00000000ULL)?32+STATIC_ILOG5((_v)>>32):STATIC_ILOG5(_v))
+/**
+ * STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
+ * @_v: A non-negative 32-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define STATIC_ILOG_32(_v) (STATIC_ILOG5((uint32_t)(_v)))
+/**
+ * STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
+ * @_v: A non-negative 64-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define STATIC_ILOG_64(_v) (STATIC_ILOG6((uint64_t)(_v)))
+
+#endif
diff --git a/ccan/ilog/test/run.c b/ccan/ilog/test/run.c
new file mode 100644
index 00000000..81935840
--- /dev/null
+++ b/ccan/ilog/test/run.c
@@ -0,0 +1,90 @@
+#include <stdio.h>
+#include "ilog/ilog.h"
+#include "tap/tap.h"
+#if defined(__GNUC_PREREQ)
+# if __GNUC_PREREQ(4,2)
+#  pragma GCC diagnostic ignored "-Wparentheses"
+# endif
+#endif
+
+/*Dead simple (but slow) versions to compare against.*/
+
+static int test_ilog32(uint32_t _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+static int test_ilog64(uint64_t _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+#define NTRIALS (64)
+
+int main(int _argc,const char *_argv[]){
+  int nmatches;
+  int i;
+  int j;
+  /*This is how many tests you plan to run.*/
+  plan_tests(2);
+  nmatches=0;
+  for(i=0;i<=32;i++){
+    uint32_t v;
+    /*Test each bit in turn (and 0).*/
+    v=i?(uint32_t)1U<<i-1:0;
+    for(j=0;j<NTRIALS;j++){
+      int l;
+      l=test_ilog32(v);
+      if(ILOG_32(v)!=l){
+        fprintf(stderr,"ILOG_32(0x%08lX): %i != %i\n",(long)v,ILOG_32(v),l);
+      }
+      else nmatches++;
+      if(ilog32(v)!=l){
+        fprintf(stderr,"ilog32(0x%08lX): %i != %i\n",(long)v,ilog32(v),l);
+      }
+      else nmatches++;
+      if(STATIC_ILOG_32(v)!=l){
+        fprintf(stderr,"STATIC_ILOG_32(0x%08lX): %i != %i\n",
+         (long)v,STATIC_ILOG_32(v),l);
+      }
+      else nmatches++;
+      /*Also try a few more pseudo-random values with at most the same number
+         of bits.*/
+      v=1103515245U*v+12345U&0xFFFFFFFFU>>(33-i>>1)>>(32-i>>1);
+    }
+  }
+  ok1(nmatches==3*(32+1)*NTRIALS);
+  nmatches=0;
+  for(i=0;i<=64;i++){
+    uint64_t v;
+    /*Test each bit in turn (and 0).*/
+    v=i?(uint64_t)1U<<i-1:0;
+    for(j=0;j<NTRIALS;j++){
+      int l;
+      l=test_ilog64(v);
+      if(ILOG_64(v)!=l){
+        fprintf(stderr,"ILOG_64(0x%016llX): %i != %i\n",
+         (long long)v,ILOG_64(v),l);
+      }
+      else nmatches++;
+      if(ilog64(v)!=l){
+        fprintf(stderr,"ilog64(0x%016llX): %i != %i\n",
+         (long long)v,ilog64(v),l);
+      }
+      else nmatches++;
+      if(STATIC_ILOG_64(v)!=l){
+        fprintf(stderr,"STATIC_ILOG_64(0x%016llX): %i != %i\n",
+         (long long)v,STATIC_ILOG_64(v),l);
+      }
+      else nmatches++;
+      /*Also try a few more pseudo-random values with at most the same number
+         of bits.*/
+      v=(uint64_t)(2862933555777941757ULL*v+3037000493ULL
+       &0xFFFFFFFFFFFFFFFFULL>>(65-i>>1)>>(64-i>>1));
+    }
+  }
+  ok1(nmatches==3*(64+1)*NTRIALS);
+  return exit_status();
+}
-- 
2.39.2