]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/free.c
tdb2: make internal coalesce() function return length coalesced.
[ccan] / ccan / tdb2 / free.c
index c78d138943b108550dbf886f4aa599e5310a1e7c..eaaeb3cf2bca594d8294ac9a25b533d45d671eea 100644 (file)
@@ -333,16 +333,16 @@ static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
        return off;
 }
 
-/* Note: we unlock the current bucket if we coalesce or fail. */
-static tdb_bool_err coalesce(struct tdb_context *tdb,
-                            tdb_off_t off, tdb_off_t b_off,
-                            tdb_len_t data_len)
+/* Note: we unlock the current bucket if we coalesce (> 0) or fail (-ve). */
+static tdb_len_t coalesce(struct tdb_context *tdb,
+                         tdb_off_t off, tdb_off_t b_off,
+                         tdb_len_t data_len)
 {
        tdb_off_t end;
        struct tdb_free_record rec;
        enum TDB_ERROR ecode;
 
-       add_stat(tdb, alloc_coalesce_tried, 1);
+       tdb->stats.alloc_coalesce_tried++;
        end = off + sizeof(struct tdb_used_record) + data_len;
 
        while (end < tdb->file->map_size) {
@@ -376,7 +376,7 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                /* We may be violating lock order here, so best effort. */
                if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT)
                    != TDB_SUCCESS) {
-                       add_stat(tdb, alloc_coalesce_lockfail, 1);
+                       tdb->stats.alloc_coalesce_lockfail++;
                        break;
                }
 
@@ -388,14 +388,14 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                }
 
                if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) {
-                       add_stat(tdb, alloc_coalesce_race, 1);
+                       tdb->stats.alloc_coalesce_race++;
                        tdb_unlock_free_bucket(tdb, nb_off);
                        break;
                }
 
                if (unlikely(frec_ftable(&rec) != ftable)
                    || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
-                       add_stat(tdb, alloc_coalesce_race, 1);
+                       tdb->stats.alloc_coalesce_race++;
                        tdb_unlock_free_bucket(tdb, nb_off);
                        break;
                }
@@ -409,12 +409,12 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
 
                end += sizeof(struct tdb_used_record) + frec_len(&rec);
                tdb_unlock_free_bucket(tdb, nb_off);
-               add_stat(tdb, alloc_coalesce_num_merged, 1);
+               tdb->stats.alloc_coalesce_num_merged++;
        }
 
        /* Didn't find any adjacent free? */
        if (end == off + sizeof(struct tdb_used_record) + data_len)
-               return false;
+               return 0;
 
        /* OK, expand initial record */
        ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
@@ -446,14 +446,15 @@ static tdb_bool_err coalesce(struct tdb_context *tdb,
                goto err;
        }
 
-       add_stat(tdb, alloc_coalesce_succeeded, 1);
+       tdb->stats.alloc_coalesce_succeeded++;
        tdb_unlock_free_bucket(tdb, b_off);
 
        ecode = add_free_record(tdb, off, end - off);
        if (ecode != TDB_SUCCESS) {
                return ecode;
        }
-       return true;
+       /* Return usable length. */
+       return end - off - sizeof(struct tdb_used_record);
 
 err:
        /* To unify error paths, we *always* unlock bucket on error. */
@@ -476,7 +477,7 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
        size_t size = adjust_size(keylen, datalen);
        enum TDB_ERROR ecode;
 
-       add_stat(tdb, allocs, 1);
+       tdb->stats.allocs++;
 again:
        b_off = bucket_off(ftable_off, bucket);
 
@@ -506,9 +507,8 @@ again:
 
        while (off) {
                const struct tdb_free_record *r;
-               tdb_len_t len;
+               tdb_len_t len, coal;
                tdb_off_t next;
-               int coal;
 
                r = tdb_access_read(tdb, off, sizeof(*r), true);
                if (TDB_PTR_IS_ERR(r)) {
@@ -544,14 +544,14 @@ again:
 
                /* Since we're going slow anyway, try coalescing here. */
                coal = coalesce(tdb, off, b_off, len);
-               if (coal == 1) {
-                       /* This has unlocked list, restart. */
-                       goto again;
-               }
-               if (coal < 0) {
+               if (TDB_OFF_IS_ERR(coal)) {
                        /* This has already unlocked on error. */
                        return coal;
                }
+               if (coal > 0) {
+                       /* This has unlocked list, restart. */
+                       goto again;
+               }
                off = next;
        }
 
@@ -596,7 +596,7 @@ again:
                /* Bucket of leftover will be <= current bucket, so nested
                 * locking is allowed. */
                if (leftover) {
-                       add_stat(tdb, alloc_leftover, 1);
+                       tdb->stats.alloc_leftover++;
                        ecode = add_free_record(tdb,
                                                best_off + sizeof(rec)
                                                + frec_len(&best) - leftover,
@@ -649,9 +649,9 @@ static tdb_off_t get_free(struct tdb_context *tdb,
                                return off;
                        if (off != 0) {
                                if (b == start_b)
-                                       add_stat(tdb, alloc_bucket_exact, 1);
+                                       tdb->stats.alloc_bucket_exact++;
                                if (b == TDB_FREE_BUCKETS - 1)
-                                       add_stat(tdb, alloc_bucket_max, 1);
+                                       tdb->stats.alloc_bucket_max++;
                                /* Worked?  Stay using this list. */
                                tdb->ftable_off = ftable_off;
                                tdb->ftable = ftable;
@@ -713,13 +713,10 @@ enum TDB_ERROR set_header(struct tdb_context *tdb,
 /* Expand the database. */
 static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
 {
-       uint64_t old_size;
+       uint64_t old_size, rec_size, map_size;
        tdb_len_t wanted;
        enum TDB_ERROR ecode;
 
-       /* We need room for the record header too. */
-       wanted = sizeof(struct tdb_used_record) + size;
-
        /* Need to hold a hash lock to expand DB: transactions rely on it. */
        if (!(tdb->flags & TDB_NOLOCK)
            && !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
@@ -727,14 +724,6 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
                                  "tdb_expand: must hold lock during expand");
        }
 
-       /* always make room for at least 100 more records, and at
-           least 25% more space. */
-       if (size * TDB_EXTENSION_FACTOR > tdb->file->map_size / 4)
-               wanted = size * TDB_EXTENSION_FACTOR;
-       else
-               wanted = tdb->file->map_size / 4;
-       wanted = adjust_size(0, wanted);
-
        /* Only one person can expand file at a time. */
        ecode = tdb_lock_expand(tdb, F_WRLCK);
        if (ecode != TDB_SUCCESS) {
@@ -749,6 +738,32 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
                return TDB_SUCCESS;
        }
 
+       /* limit size in order to avoid using up huge amounts of memory for
+        * in memory tdbs if an oddball huge record creeps in */
+       if (size > 100 * 1024) {
+               rec_size = size * 2;
+       } else {
+               rec_size = size * 100;
+       }
+
+       /* always make room for at least rec_size more records, and at
+          least 25% more space. if the DB is smaller than 100MiB,
+          otherwise grow it by 10% only. */
+       if (old_size > 100 * 1024 * 1024) {
+               map_size = old_size / 10;
+       } else {
+               map_size = old_size / 4;
+       }
+
+       if (map_size > rec_size) {
+               wanted = map_size;
+       } else {
+               wanted = rec_size;
+       }
+
+       /* We need room for the record header too. */
+       wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted);
+
        ecode = tdb->methods->expand_file(tdb, wanted);
        if (ecode != TDB_SUCCESS) {
                tdb_unlock_expand(tdb, F_WRLCK);
@@ -758,7 +773,7 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
        /* We need to drop this lock before adding free record. */
        tdb_unlock_expand(tdb, F_WRLCK);
 
-       add_stat(tdb, expands, 1);
+       tdb->stats.expands++;
        return add_free_record(tdb, old_size, wanted);
 }