static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve);
+ u64 num_bytes, int reserve,
+ int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
u64 num_entries =
atomic_read(&trans->transaction->delayed_refs.num_entries);
u64 avg_runtime;
+ u64 val;
smp_mb();
avg_runtime = fs_info->avg_delayed_ref_runtime;
+ val = num_entries * avg_runtime;
if (num_entries * avg_runtime >= NSEC_PER_SEC)
return 1;
+ if (val >= NSEC_PER_SEC / 2)
+ return 2;
return btrfs_check_space_for_delayed_refs(trans, root);
}
+struct async_delayed_refs {
+ struct btrfs_root *root;
+ int count;
+ int error;
+ int sync;
+ struct completion wait;
+ struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+ struct async_delayed_refs *async;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ async = container_of(work, struct async_delayed_refs, work);
+
+ trans = btrfs_join_transaction(async->root);
+ if (IS_ERR(trans)) {
+ async->error = PTR_ERR(trans);
+ goto done;
+ }
+
+ /*
+ * trans->sync means that when we call end_transaciton, we won't
+ * wait on delayed refs
+ */
+ trans->sync = true;
+ ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+ if (ret)
+ async->error = ret;
+
+ ret = btrfs_end_transaction(trans, async->root);
+ if (ret && !async->error)
+ async->error = ret;
+done:
+ if (async->sync)
+ complete(&async->wait);
+ else
+ kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+ unsigned long count, int wait)
+{
+ struct async_delayed_refs *async;
+ int ret;
+
+ async = kmalloc(sizeof(*async), GFP_NOFS);
+ if (!async)
+ return -ENOMEM;
+
+ async->root = root->fs_info->tree_root;
+ async->count = count;
+ async->error = 0;
+ if (wait)
+ async->sync = 1;
+ else
+ async->sync = 0;
+ init_completion(&async->wait);
+
+ btrfs_init_work(&async->work, delayed_ref_async_start,
+ NULL, NULL);
+
+ btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+ if (wait) {
+ wait_for_completion(&async->wait);
+ ret = async->error;
+ kfree(async);
+ return ret;
+ }
+ return 0;
+}
+
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE)) {
+ !btrfs_test_opt(root, SPACE_CACHE) ||
+ block_group->delalloc_bytes) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
return ret;
}
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&found->block_groups[i]);
- kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
- }
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
* @cache: The cache we are manipulating
* @num_bytes: The number of bytes in question
* @reserve: One of the reservation enums
+ * @delalloc: The blocks are allocated for the delalloc write
*
* This is called by the allocator when it reserves space, or by somebody who is
* freeing space that was never actually used on disk. For example if you
* succeeds.
*/
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve)
+ u64 num_bytes, int reserve, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
+
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
}
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+
+ if (delalloc)
+ cache->delalloc_bytes -= num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group_cache *cache;
- struct btrfs_space_info *space_info;
down_write(&fs_info->commit_root_sem);
up_write(&fs_info->commit_root_sem);
- list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
- percpu_counter_set(&space_info->total_bytes_pinned, 0);
-
update_global_block_rsv(fs_info);
}
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
+ percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
btrfs_err(info, "trying to drop %d refs but we only have %Lu "
- "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+ "for bytenr %Lu", refs_to_drop, refs, bytenr);
ret = -EINVAL;
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
- btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+ btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
LOOP_NO_EMPTY_SIZE = 3,
};
+static inline void
+btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ if (delalloc)
+ down_read(&cache->data_rwsem);
+}
+
+static inline void
+btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ btrfs_get_block_group(cache);
+ if (delalloc)
+ down_read(&cache->data_rwsem);
+}
+
+static struct btrfs_block_group_cache *
+btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ int delalloc)
+{
+ struct btrfs_block_group_cache *used_bg;
+ bool locked = false;
+again:
+ spin_lock(&cluster->refill_lock);
+ if (locked) {
+ if (used_bg == cluster->block_group)
+ return used_bg;
+
+ up_read(&used_bg->data_rwsem);
+ btrfs_put_block_group(used_bg);
+ }
+
+ used_bg = cluster->block_group;
+ if (!used_bg)
+ return NULL;
+
+ if (used_bg == block_group)
+ return used_bg;
+
+ btrfs_get_block_group(used_bg);
+
+ if (!delalloc)
+ return used_bg;
+
+ if (down_read_trylock(&used_bg->data_rwsem))
+ return used_bg;
+
+ spin_unlock(&cluster->refill_lock);
+ down_read(&used_bg->data_rwsem);
+ locked = true;
+ goto again;
+}
+
+static inline void
+btrfs_release_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ if (delalloc)
+ up_read(&cache->data_rwsem);
+ btrfs_put_block_group(cache);
+}
+
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
- u64 flags)
+ u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
up_read(&space_info->groups_sem);
} else {
index = get_block_group_index(block_group);
+ btrfs_lock_block_group(block_group, delalloc);
goto have_block_group;
}
} else if (block_group) {
u64 offset;
int cached;
- btrfs_get_block_group(block_group);
+ btrfs_grab_block_group(block_group, delalloc);
search_start = block_group->key.objectid;
/*
* the refill lock keeps out other
* people trying to start a new cluster
*/
- spin_lock(&last_ptr->refill_lock);
- used_block_group = last_ptr->block_group;
- if (used_block_group != block_group &&
- (!used_block_group ||
- used_block_group->ro ||
- !block_group_bits(used_block_group, flags)))
+ used_block_group = btrfs_lock_cluster(block_group,
+ last_ptr,
+ delalloc);
+ if (!used_block_group)
goto refill_cluster;
- if (used_block_group != block_group)
- btrfs_get_block_group(used_block_group);
+ if (used_block_group != block_group &&
+ (used_block_group->ro ||
+ !block_group_bits(used_block_group, flags)))
+ goto release_cluster;
offset = btrfs_alloc_from_cluster(used_block_group,
last_ptr,
used_block_group,
search_start, num_bytes);
if (used_block_group != block_group) {
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group,
+ delalloc);
block_group = used_block_group;
}
goto checks;
}
WARN_ON(last_ptr->block_group != used_block_group);
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
-refill_cluster:
+release_cluster:
/* If we are on LOOP_NO_EMPTY_SIZE, we can't
* set up a new clusters, so lets just skip it
* and let the allocator find whatever block
* succeeding in the unclustered
* allocation. */
if (loop >= LOOP_NO_EMPTY_SIZE &&
- last_ptr->block_group != block_group) {
+ used_block_group != block_group) {
spin_unlock(&last_ptr->refill_lock);
+ btrfs_release_block_group(used_block_group,
+ delalloc);
goto unclustered_alloc;
}
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
+ if (used_block_group != block_group)
+ btrfs_release_block_group(used_block_group,
+ delalloc);
+refill_cluster:
if (loop >= LOOP_NO_EMPTY_SIZE) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
BUG_ON(offset > search_start);
ret = btrfs_update_reserved_bytes(block_group, num_bytes,
- alloc_type);
+ alloc_type, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
trace_btrfs_reserve_extent(orig_root, block_group,
search_start, num_bytes);
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group, delalloc);
break;
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group, delalloc);
}
up_read(&space_info->groups_sem);
int btrfs_reserve_extent(struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data)
+ struct btrfs_key *ins, int is_data, int delalloc)
{
bool final_tried = false;
u64 flags;
again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
- flags);
+ flags, delalloc);
if (ret == -ENOSPC) {
if (!final_tried && ins->offset) {
}
static int __btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len, int pin)
+ u64 start, u64 len,
+ int pin, int delalloc)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
pin_down_extent(root, cache, start, len, 1);
else {
btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
}
btrfs_put_block_group(cache);
}
int btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len)
+ u64 start, u64 len, int delalloc)
{
- return __btrfs_free_reserved_extent(root, start, len, 0);
+ return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
}
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
{
- return __btrfs_free_reserved_extent(root, start, len, 1);
+ return __btrfs_free_reserved_extent(root, start, len, 1, 0);
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
return -EINVAL;
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT);
+ RESERVE_ALLOC_NO_ACCOUNT, 0);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(root, blocksize, blocksize,
- empty_size, hint, &ins, 0);
+ empty_size, hint, &ins, 0, 0);
if (ret) {
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
- kobj = &space_info->block_group_kobjs[i];
- if (kobj->parent) {
+ kobj = space_info->block_group_kobjs[i];
+ space_info->block_group_kobjs[i] = NULL;
+ if (kobj) {
kobject_del(kobj);
kobject_put(kobj);
}
up_write(&space_info->groups_sem);
if (first) {
- struct kobject *kobj = &space_info->block_group_kobjs[index];
+ struct raid_kobject *rkobj;
int ret;
- kobject_get(&space_info->kobj); /* put in release */
- ret = kobject_add(kobj, &space_info->kobj, "%s",
- get_raid_name(index));
+ rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+ if (!rkobj)
+ goto out_err;
+ rkobj->raid_type = index;
+ kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+ ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+ "%s", get_raid_name(index));
if (ret) {
- pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
- kobject_put(&space_info->kobj);
+ kobject_put(&rkobj->kobj);
+ goto out_err;
}
+ space_info->block_group_kobjs[index] = &rkobj->kobj;
}
+
+ return;
+out_err:
+ pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
}
static struct btrfs_block_group_cache *
start);
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
+ init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->new_bg_list);
struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_key key;
struct inode *inode;
+ struct kobject *kobj = NULL;
int ret;
int index;
int factor;
*/
list_del_init(&block_group->list);
if (list_empty(&block_group->space_info->block_groups[index])) {
- kobject_del(&block_group->space_info->block_group_kobjs[index]);
- kobject_put(&block_group->space_info->block_group_kobjs[index]);
+ kobj = block_group->space_info->block_group_kobjs[index];
+ block_group->space_info->block_group_kobjs[index] = NULL;
clear_avail_alloc_bits(root->fs_info, block_group->flags);
}
up_write(&block_group->space_info->groups_sem);
+ if (kobj) {
+ kobject_del(kobj);
+ kobject_put(kobj);
+ }
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_block_group_cache_done(block_group);