X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb2%2Ffree.c;h=658adec13bd4098859a9b7553994d0d0080d1391;hp=7482daa2153de5f61773353a4607eb8646a895d8;hb=818ed29730b030ce79855fc35c212b51adff3180;hpb=024a5647e6c81735a93d826b56db0db4bf86fab8 diff --git a/ccan/tdb2/free.c b/ccan/tdb2/free.c index 7482daa2..658adec1 100644 --- a/ccan/tdb2/free.c +++ b/ccan/tdb2/free.c @@ -65,8 +65,8 @@ enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb) unsigned int rnd, max = 0, count = 0; tdb_off_t off; - tdb->ftable_off = off = first_ftable(tdb); - tdb->ftable = 0; + tdb->tdb2.ftable_off = off = first_ftable(tdb); + tdb->tdb2.ftable = 0; while (off) { if (TDB_OFF_IS_ERR(off)) { @@ -75,8 +75,8 @@ enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb) rnd = random(); if (rnd >= max) { - tdb->ftable_off = off; - tdb->ftable = count; + tdb->tdb2.ftable_off = off; + tdb->tdb2.ftable = count; max = rnd; } @@ -218,7 +218,7 @@ static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb, return head; /* We only need to set ftable_and_len; rest is set in enqueue_in_free */ - new.ftable_and_len = ((uint64_t)tdb->ftable << (64 - TDB_OFF_UPPER_STEAL)) + new.ftable_and_len = ((uint64_t)tdb->tdb2.ftable << (64 - TDB_OFF_UPPER_STEAL)) | len; /* new->next = head. */ @@ -287,8 +287,8 @@ static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable) tdb_off_t off; unsigned int i; - if (likely(tdb->ftable == ftable)) - return tdb->ftable_off; + if (likely(tdb->tdb2.ftable == ftable)) + return tdb->tdb2.ftable_off; off = first_ftable(tdb); for (i = 0; i < ftable; i++) { @@ -370,8 +370,10 @@ static tdb_len_t coalesce(struct tdb_context *tdb, } /* Did we just mess up a record you were hoping to use? */ - if (end == *protect) + if (end == *protect) { + tdb->stats.alloc_coalesce_iterate_clash++; *protect = TDB_ERR_NOEXIST; + } ecode = remove_from_list(tdb, nb_off, end, &rec); check_list(tdb, nb_off); @@ -390,8 +392,10 @@ static tdb_len_t coalesce(struct tdb_context *tdb, return 0; /* Before we expand, check this isn't one you wanted protected? */ - if (off == *protect) + if (off == *protect) { *protect = TDB_ERR_EXISTS; + tdb->stats.alloc_coalesce_iterate_clash++; + } /* OK, expand initial record */ ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); @@ -416,6 +420,7 @@ static tdb_len_t coalesce(struct tdb_context *tdb, ecode = add_free_record(tdb, off, end - off, TDB_LOCK_NOWAIT, false); if (ecode != TDB_SUCCESS) { /* Need to drop lock. Can't rely on anything stable. */ + tdb->stats.alloc_coalesce_lockfail++; *protect = TDB_ERR_CORRUPT; /* We have to drop this to avoid deadlocks, so make sure record @@ -431,7 +436,6 @@ static tdb_len_t coalesce(struct tdb_context *tdb, goto err; } - tdb->stats.alloc_coalesce_succeeded++; tdb_unlock_free_bucket(tdb, b_off); ecode = add_free_record(tdb, off, end - off, TDB_LOCK_WAIT, @@ -443,6 +447,7 @@ static tdb_len_t coalesce(struct tdb_context *tdb, /* For simplicity, we always drop lock if they can't continue */ tdb_unlock_free_bucket(tdb, b_off); } + tdb->stats.alloc_coalesce_succeeded++; /* Return usable length. */ return end - off - sizeof(struct tdb_used_record); @@ -455,7 +460,9 @@ err: /* List is locked: we unlock it. */ static enum TDB_ERROR coalesce_list(struct tdb_context *tdb, - tdb_off_t ftable_off, tdb_off_t b_off) + tdb_off_t ftable_off, + tdb_off_t b_off, + unsigned int limit) { enum TDB_ERROR ecode; tdb_off_t off; @@ -465,10 +472,10 @@ static enum TDB_ERROR coalesce_list(struct tdb_context *tdb, ecode = off; goto unlock_err; } - /* A little bit of paranoia */ + /* A little bit of paranoia: counter should be 0. */ off &= TDB_OFF_MASK; - while (off) { + while (off && limit--) { struct tdb_free_record rec; tdb_len_t coal; tdb_off_t next; @@ -487,9 +494,85 @@ static enum TDB_ERROR coalesce_list(struct tdb_context *tdb, /* Coalescing had to unlock, so stop. */ return TDB_SUCCESS; } + /* Keep going if we're doing well... */ + limit += size_to_bucket(coal / 16 + TDB_MIN_DATA_LEN); off = next; } + /* Now, move those elements to the tail of the list so we get something + * else next time. */ + if (off) { + struct tdb_free_record oldhrec, newhrec, oldtrec, newtrec; + tdb_off_t oldhoff, oldtoff, newtoff; + + /* The record we were up to is the new head. */ + ecode = tdb_read_convert(tdb, off, &newhrec, sizeof(newhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the new tail. */ + newtoff = frec_prev(&newhrec); + ecode = tdb_read_convert(tdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the old head. */ + oldhoff = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(oldhoff)) { + ecode = oldhoff; + goto unlock_err; + } + + /* This could happen if they all coalesced away. */ + if (oldhoff == off) + goto out; + + ecode = tdb_read_convert(tdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the old tail. */ + oldtoff = frec_prev(&oldhrec); + ecode = tdb_read_convert(tdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Old tail's next points to old head. */ + oldtrec.next = oldhoff; + + /* Old head's prev points to old tail. */ + oldhrec.magic_and_prev + = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL)) + | oldtoff; + + /* New tail's next is 0. */ + newtrec.next = 0; + + /* Write out the modified versions. */ + ecode = tdb_write_convert(tdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + ecode = tdb_write_convert(tdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + ecode = tdb_write_convert(tdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* And finally link in new head. */ + ecode = tdb_write_off(tdb, b_off, off); + if (ecode != TDB_SUCCESS) + goto unlock_err; + } +out: tdb_unlock_free_bucket(tdb, b_off); return TDB_SUCCESS; @@ -512,7 +595,7 @@ enum TDB_ERROR add_free_record(struct tdb_context *tdb, len = len_with_header - sizeof(struct tdb_used_record); - b_off = bucket_off(tdb->ftable_off, size_to_bucket(len)); + b_off = bucket_off(tdb->tdb2.ftable_off, size_to_bucket(len)); ecode = tdb_lock_free_bucket(tdb, b_off, waitflag); if (ecode != TDB_SUCCESS) { return ecode; @@ -523,7 +606,7 @@ enum TDB_ERROR add_free_record(struct tdb_context *tdb, /* Coalescing unlocks free list. */ if (!ecode && coalesce) - ecode = coalesce_list(tdb, tdb->ftable_off, b_off); + ecode = coalesce_list(tdb, tdb->tdb2.ftable_off, b_off, 2); else tdb_unlock_free_bucket(tdb, b_off); return ecode; @@ -669,7 +752,7 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb, /* For futureproofing, we put a 0 in any unused space. */ if (rec_extra_padding(&rec)) { - ecode = tdb->methods->twrite(tdb, best_off + sizeof(rec) + ecode = tdb->tdb2.io->twrite(tdb, best_off + sizeof(rec) + keylen + datalen, "", 1); if (ecode != TDB_SUCCESS) { goto unlock_err; @@ -717,9 +800,9 @@ static tdb_off_t get_free(struct tdb_context *tdb, else start_b = size_to_bucket(adjust_size(keylen, datalen)); - ftable_off = tdb->ftable_off; - ftable = tdb->ftable; - while (!wrapped || ftable_off != tdb->ftable_off) { + ftable_off = tdb->tdb2.ftable_off; + ftable = tdb->tdb2.ftable; + while (!wrapped || ftable_off != tdb->tdb2.ftable_off) { /* Start at exact size bucket, and search up... */ for (b = find_free_head(tdb, ftable_off, start_b); b < TDB_FREE_BUCKETS; @@ -736,8 +819,8 @@ static tdb_off_t get_free(struct tdb_context *tdb, if (b == TDB_FREE_BUCKETS - 1) tdb->stats.alloc_bucket_max++; /* Worked? Stay using this list. */ - tdb->ftable_off = ftable_off; - tdb->ftable = ftable; + tdb->tdb2.ftable_off = ftable_off; + tdb->tdb2.ftable = ftable; return off; } /* Didn't work. Try next bucket. */ @@ -815,7 +898,7 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size) /* Someone else may have expanded the file, so retry. */ old_size = tdb->file->map_size; - tdb->methods->oob(tdb, tdb->file->map_size + 1, true); + tdb->tdb2.io->oob(tdb, tdb->file->map_size + 1, true); if (tdb->file->map_size != old_size) { tdb_unlock_expand(tdb, F_WRLCK); return TDB_SUCCESS; @@ -847,7 +930,7 @@ static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size) /* We need room for the record header too. */ wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted); - ecode = tdb->methods->expand_file(tdb, wanted); + ecode = tdb->tdb2.io->expand_file(tdb, wanted); if (ecode != TDB_SUCCESS) { tdb_unlock_expand(tdb, F_WRLCK); return ecode; @@ -867,7 +950,7 @@ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, tdb_off_t off; /* We can't hold pointers during this: we could unmap! */ - assert(!tdb->direct_access); + assert(!tdb->tdb2.direct_access); for (;;) { enum TDB_ERROR ecode;