tdb2: make internal coalesce() function return length coalesced.

[ccan] / ccan / tdb2 / free.c
diff --git a/ccan/tdb2/free.c b/ccan/tdb2/free.c

index c78d138943b108550dbf886f4aa599e5310a1e7c..eaaeb3cf2bca594d8294ac9a25b533d45d671eea 100644 (file)
--- a/ccan/tdb2/free.c
+++ b/ccan/tdb2/free.c
@@ -333,16 +333,16 @@ static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
         return off;
  }
  
-/* Note: we unlock the current bucket if we coalesce or fail. */
-static tdb_bool_err coalesce(struct tdb_context *tdb,
-                            tdb_off_t off, tdb_off_t b_off,
-                            tdb_len_t data_len)
+/* Note: we unlock the current bucket if we coalesce (> 0) or fail (-ve). */
+static tdb_len_t coalesce(struct tdb_context *tdb,
+                         tdb_off_t off, tdb_off_t b_off,
+                         tdb_len_t data_len)
  {
         tdb_off_t end;
         struct tdb_free_record rec;
         enum TDB_ERROR ecode;
  
-       add_stat(tdb, alloc_coalesce_tried, 1);
+       tdb->stats.alloc_coalesce_tried++;
         end = off + sizeof(struct tdb_used_record) + data_len;
  
         while (end < tdb->file->map_size) {
@@ -376,7 +376,7 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                 /* We may be violating lock order here, so best effort. */
                 if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT)
                     != TDB_SUCCESS) {
-                       add_stat(tdb, alloc_coalesce_lockfail, 1);
+                       tdb->stats.alloc_coalesce_lockfail++;
                         break;
                 }
  
@@ -388,14 +388,14 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                 }
  
                 if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) {
-                       add_stat(tdb, alloc_coalesce_race, 1);
+                       tdb->stats.alloc_coalesce_race++;
                         tdb_unlock_free_bucket(tdb, nb_off);
                         break;
                 }
  
                 if (unlikely(frec_ftable(&rec) != ftable)
                     || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
-                       add_stat(tdb, alloc_coalesce_race, 1);
+                       tdb->stats.alloc_coalesce_race++;
                         tdb_unlock_free_bucket(tdb, nb_off);
                         break;
                 }
@@ -409,12 +409,12 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
  
                 end += sizeof(struct tdb_used_record) + frec_len(&rec);
                 tdb_unlock_free_bucket(tdb, nb_off);
-               add_stat(tdb, alloc_coalesce_num_merged, 1);
+               tdb->stats.alloc_coalesce_num_merged++;
         }
  
         /* Didn't find any adjacent free? */
         if (end == off + sizeof(struct tdb_used_record) + data_len)
-               return false;
+               return 0;
  
         /* OK, expand initial record */
         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
@@ -446,14 +446,15 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                 goto err;
         }
  
-       add_stat(tdb, alloc_coalesce_succeeded, 1);
+       tdb->stats.alloc_coalesce_succeeded++;
         tdb_unlock_free_bucket(tdb, b_off);
  
         ecode = add_free_record(tdb, off, end - off);
         if (ecode != TDB_SUCCESS) {
                 return ecode;
         }
-       return true;
+       /* Return usable length. */
+       return end - off - sizeof(struct tdb_used_record);
  
  err:
         /* To unify error paths, we *always* unlock bucket on error. */
@@ -476,7 +477,7 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
         size_t size = adjust_size(keylen, datalen);
         enum TDB_ERROR ecode;
  
-       add_stat(tdb, allocs, 1);
+       tdb->stats.allocs++;
  again:
         b_off = bucket_off(ftable_off, bucket);
  
@@ -506,9 +507,8 @@ again:
  
         while (off) {
                 const struct tdb_free_record *r;
-               tdb_len_t len;
+               tdb_len_t len, coal;
                 tdb_off_t next;
-               int coal;
  
                 r = tdb_access_read(tdb, off, sizeof(*r), true);
                 if (TDB_PTR_IS_ERR(r)) {
@@ -544,14 +544,14 @@ again:
  
                 /* Since we're going slow anyway, try coalescing here. */
                 coal = coalesce(tdb, off, b_off, len);
-               if (coal == 1) {
-                       /* This has unlocked list, restart. */
-                       goto again;
-               }
-               if (coal < 0) {
+               if (TDB_OFF_IS_ERR(coal)) {
                         /* This has already unlocked on error. */
                         return coal;
                 }
+               if (coal > 0) {
+                       /* This has unlocked list, restart. */
+                       goto again;
+               }
                 off = next;
         }
  
@@ -596,7 +596,7 @@ again:
                 /* Bucket of leftover will be <= current bucket, so nested
                  * locking is allowed. */
                 if (leftover) {
-                       add_stat(tdb, alloc_leftover, 1);
+                       tdb->stats.alloc_leftover++;
                         ecode = add_free_record(tdb,
                                                 best_off + sizeof(rec)
                                                 + frec_len(&best) - leftover,
@@ -649,9 +649,9 @@ static tdb_off_t get_free(struct tdb_context *tdb,
                                 return off;
                         if (off != 0) {
                                 if (b == start_b)
-                                       add_stat(tdb, alloc_bucket_exact, 1);
+                                       tdb->stats.alloc_bucket_exact++;
                                 if (b == TDB_FREE_BUCKETS - 1)
-                                       add_stat(tdb, alloc_bucket_max, 1);
+                                       tdb->stats.alloc_bucket_max++;
                                 /* Worked?  Stay using this list. */
                                 tdb->ftable_off = ftable_off;
                                 tdb->ftable = ftable;
@@ -713,13 +713,10 @@ enum TDB_ERROR set_header(struct tdb_context *tdb,
  /* Expand the database. */
  static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
  {
-       uint64_t old_size;
+       uint64_t old_size, rec_size, map_size;
         tdb_len_t wanted;
         enum TDB_ERROR ecode;
  
-       /* We need room for the record header too. */
-       wanted = sizeof(struct tdb_used_record) + size;
-
         /* Need to hold a hash lock to expand DB: transactions rely on it. */
         if (!(tdb->flags & TDB_NOLOCK)
             && !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
@@ -727,14 +724,6 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
                                   "tdb_expand: must hold lock during expand");
         }
  
-       /* always make room for at least 100 more records, and at
-           least 25% more space. */
-       if (size * TDB_EXTENSION_FACTOR > tdb->file->map_size / 4)
-               wanted = size * TDB_EXTENSION_FACTOR;
-       else
-               wanted = tdb->file->map_size / 4;
-       wanted = adjust_size(0, wanted);
-
         /* Only one person can expand file at a time. */
         ecode = tdb_lock_expand(tdb, F_WRLCK);
         if (ecode != TDB_SUCCESS) {
@@ -749,6 +738,32 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
                 return TDB_SUCCESS;
         }
  
+       /* limit size in order to avoid using up huge amounts of memory for
+        * in memory tdbs if an oddball huge record creeps in */
+       if (size > 100 * 1024) {
+               rec_size = size * 2;
+       } else {
+               rec_size = size * 100;
+       }
+
+       /* always make room for at least rec_size more records, and at
+          least 25% more space. if the DB is smaller than 100MiB,
+          otherwise grow it by 10% only. */
+       if (old_size > 100 * 1024 * 1024) {
+               map_size = old_size / 10;
+       } else {
+               map_size = old_size / 4;
+       }
+
+       if (map_size > rec_size) {
+               wanted = map_size;
+       } else {
+               wanted = rec_size;
+       }
+
+       /* We need room for the record header too. */
+       wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted);
+
         ecode = tdb->methods->expand_file(tdb, wanted);
         if (ecode != TDB_SUCCESS) {
                 tdb_unlock_expand(tdb, F_WRLCK);
@@ -758,7 +773,7 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
         /* We need to drop this lock before adding free record. */
         tdb_unlock_expand(tdb, F_WRLCK);
  
-       add_stat(tdb, expands, 1);
+       tdb->stats.expands++;
         return add_free_record(tdb, old_size, wanted);
  }