/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdbool.h>
#include "kerncompat.h"
#include "kernel-shared/extent_io.h"
#include "kernel-lib/list.h"
#include "kernel-lib/raid56.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/volumes.h"
#include "kernel-shared/disk-io.h"
#include "common/utils.h"
#include "common/device-utils.h"
#include "common/internal.h"

void extent_io_tree_init(struct extent_io_tree *tree)
{
	cache_tree_init(&tree->state);
	cache_tree_init(&tree->cache);
	INIT_LIST_HEAD(&tree->lru);
	tree->cache_size = 0;
	tree->max_cache_size = (u64)total_memory() / 4;
}

void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
				   u64 max_cache_size)
{
	extent_io_tree_init(tree);
	tree->max_cache_size = max_cache_size;
}

static struct extent_state *alloc_extent_state(void)
{
	struct extent_state *state;

	state = malloc(sizeof(*state));
	if (!state)
		return NULL;
	state->cache_node.objectid = 0;
	state->refs = 1;
	state->state = 0;
	state->xprivate = 0;
	return state;
}

static void btrfs_free_extent_state(struct extent_state *state)
{
	state->refs--;
	BUG_ON(state->refs < 0);
	if (state->refs == 0)
		free(state);
}

static void free_extent_state_func(struct cache_extent *cache)
{
	struct extent_state *es;

	es = container_of(cache, struct extent_state, cache_node);
	btrfs_free_extent_state(es);
}

static void free_extent_buffer_final(struct extent_buffer *eb);
void extent_io_tree_cleanup(struct extent_io_tree *tree)
{
	struct extent_buffer *eb;

	while(!list_empty(&tree->lru)) {
		eb = list_entry(tree->lru.next, struct extent_buffer, lru);
		if (eb->refs) {
			fprintf(stderr,
				"extent buffer leak: start %llu len %u\n",
				(unsigned long long)eb->start, eb->len);
			free_extent_buffer_nocache(eb);
		} else {
			free_extent_buffer_final(eb);
		}
	}

	cache_tree_free_extents(&tree->state, free_extent_state_func);
}

static inline void update_extent_state(struct extent_state *state)
{
	state->cache_node.start = state->start;
	state->cache_node.size = state->end + 1 - state->start;
}

/*
 * Utility function to look for merge candidates inside a given range.
 * Any extents with matching state are merged together into a single
 * extent in the tree. Extents with EXTENT_IO in their state field are
 * not merged
 */
static int merge_state(struct extent_io_tree *tree,
		       struct extent_state *state)
{
	struct extent_state *other;
	struct cache_extent *other_node;

	if (state->state & EXTENT_IOBITS)
		return 0;

	other_node = prev_cache_extent(&state->cache_node);
	if (other_node) {
		other = container_of(other_node, struct extent_state,
				     cache_node);
		if (other->end == state->start - 1 &&
		    other->state == state->state) {
			state->start = other->start;
			update_extent_state(state);
			remove_cache_extent(&tree->state, &other->cache_node);
			btrfs_free_extent_state(other);
		}
	}
	other_node = next_cache_extent(&state->cache_node);
	if (other_node) {
		other = container_of(other_node, struct extent_state,
				     cache_node);
		if (other->start == state->end + 1 &&
		    other->state == state->state) {
			other->start = state->start;
			update_extent_state(other);
			remove_cache_extent(&tree->state, &state->cache_node);
			btrfs_free_extent_state(state);
		}
	}
	return 0;
}

/*
 * insert an extent_state struct into the tree.  'bits' are set on the
 * struct before it is inserted.
 */
static int insert_state(struct extent_io_tree *tree,
			struct extent_state *state, u64 start, u64 end,
			int bits)
{
	int ret;

	BUG_ON(end < start);
	state->state |= bits;
	state->start = start;
	state->end = end;
	update_extent_state(state);
	ret = insert_cache_extent(&tree->state, &state->cache_node);
	BUG_ON(ret);
	merge_state(tree, state);
	return 0;
}

/*
 * split a given extent state struct in two, inserting the preallocated
 * struct 'prealloc' as the newly created second half.  'split' indicates an
 * offset inside 'orig' where it should be split.
 */
static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
		       struct extent_state *prealloc, u64 split)
{
	int ret;
	prealloc->start = orig->start;
	prealloc->end = split - 1;
	prealloc->state = orig->state;
	update_extent_state(prealloc);
	orig->start = split;
	update_extent_state(orig);
	ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
	BUG_ON(ret);
	return 0;
}

/*
 * clear some bits on a range in the tree.
 */
static int clear_state_bit(struct extent_io_tree *tree,
			    struct extent_state *state, int bits)
{
	int ret = state->state & bits;

	state->state &= ~bits;
	if (state->state == 0) {
		remove_cache_extent(&tree->state, &state->cache_node);
		btrfs_free_extent_state(state);
	} else {
		merge_state(tree, state);
	}
	return ret;
}

/*
 * extent_buffer_bitmap_set - set an area of a bitmap
 * @eb: the extent buffer
 * @start: offset of the bitmap item in the extent buffer
 * @pos: bit number of the first bit
 * @len: number of bits to set
 */
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
                              unsigned long pos, unsigned long len)
{
	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
	const unsigned int size = pos + len;
	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);

	while (len >= bits_to_set) {
		*p |= mask_to_set;
		len -= bits_to_set;
		bits_to_set = BITS_PER_BYTE;
		mask_to_set = ~0;
		p++;
	}
	if (len) {
		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
		*p |= mask_to_set;
	}
}

/*
 * extent_buffer_bitmap_clear - clear an area of a bitmap
 * @eb: the extent buffer
 * @start: offset of the bitmap item in the extent buffer
 * @pos: bit number of the first bit
 * @len: number of bits to clear
 */
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
                                unsigned long pos, unsigned long len)
{
	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
	const unsigned int size = pos + len;
	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);

	while (len >= bits_to_clear) {
		*p &= ~mask_to_clear;
		len -= bits_to_clear;
		bits_to_clear = BITS_PER_BYTE;
		mask_to_clear = ~0;
		p++;
	}
	if (len) {
		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
		*p &= ~mask_to_clear;
	}
}

/*
 * clear some bits on a range in the tree.
 */
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
{
	struct extent_state *state;
	struct extent_state *prealloc = NULL;
	struct cache_extent *node;
	u64 last_end;
	int err;
	int set = 0;

again:
	if (!prealloc) {
		prealloc = alloc_extent_state();
		if (!prealloc)
			return -ENOMEM;
	}

	/*
	 * this search will find the extents that end after
	 * our range starts
	 */
	node = search_cache_extent(&tree->state, start);
	if (!node)
		goto out;
	state = container_of(node, struct extent_state, cache_node);
	if (state->start > end)
		goto out;
	last_end = state->end;

	/*
	 *     | ---- desired range ---- |
	 *  | state | or
	 *  | ------------- state -------------- |
	 *
	 * We need to split the extent we found, and may flip
	 * bits on second half.
	 *
	 * If the extent we found extends past our range, we
	 * just split and search again.  It'll get split again
	 * the next time though.
	 *
	 * If the extent we found is inside our range, we clear
	 * the desired bit on it.
	 */
	if (state->start < start) {
		err = split_state(tree, state, prealloc, start);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		if (err)
			goto out;
		if (state->end <= end) {
			set |= clear_state_bit(tree, state, bits);
			if (last_end == (u64)-1)
				goto out;
			start = last_end + 1;
		} else {
			start = state->start;
		}
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 *                        | state |
	 * We need to split the extent, and clear the bit
	 * on the first half
	 */
	if (state->start <= end && state->end > end) {
		err = split_state(tree, state, prealloc, end + 1);
		BUG_ON(err == -EEXIST);

		set |= clear_state_bit(tree, prealloc, bits);
		prealloc = NULL;
		goto out;
	}

	start = state->end + 1;
	set |= clear_state_bit(tree, state, bits);
	if (last_end == (u64)-1)
		goto out;
	start = last_end + 1;
	goto search_again;
out:
	if (prealloc)
		btrfs_free_extent_state(prealloc);
	return set;

search_again:
	if (start > end)
		goto out;
	goto again;
}

/*
 * set some bits on a range in the tree.
 */
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
{
	struct extent_state *state;
	struct extent_state *prealloc = NULL;
	struct cache_extent *node;
	int err = 0;
	u64 last_start;
	u64 last_end;
again:
	if (!prealloc) {
		prealloc = alloc_extent_state();
		if (!prealloc)
			return -ENOMEM;
	}

	/*
	 * this search will find the extents that end after
	 * our range starts
	 */
	node = search_cache_extent(&tree->state, start);
	if (!node) {
		err = insert_state(tree, prealloc, start, end, bits);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		goto out;
	}

	state = container_of(node, struct extent_state, cache_node);
	last_start = state->start;
	last_end = state->end;

	/*
	 * | ---- desired range ---- |
	 * | state |
	 *
	 * Just lock what we found and keep going
	 */
	if (state->start == start && state->end <= end) {
		state->state |= bits;
		merge_state(tree, state);
		if (last_end == (u64)-1)
			goto out;
		start = last_end + 1;
		goto search_again;
	}
	/*
	 *     | ---- desired range ---- |
	 * | state |
	 *   or
	 * | ------------- state -------------- |
	 *
	 * We need to split the extent we found, and may flip bits on
	 * second half.
	 *
	 * If the extent we found extends past our
	 * range, we just split and search again.  It'll get split
	 * again the next time though.
	 *
	 * If the extent we found is inside our range, we set the
	 * desired bit on it.
	 */
	if (state->start < start) {
		err = split_state(tree, state, prealloc, start);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		if (err)
			goto out;
		if (state->end <= end) {
			state->state |= bits;
			start = state->end + 1;
			merge_state(tree, state);
			if (last_end == (u64)-1)
				goto out;
			start = last_end + 1;
		} else {
			start = state->start;
		}
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 *     | state | or               | state |
	 *
	 * There's a hole, we need to insert something in it and
	 * ignore the extent we found.
	 */
	if (state->start > start) {
		u64 this_end;
		if (end < last_start)
			this_end = end;
		else
			this_end = last_start -1;
		err = insert_state(tree, prealloc, start, this_end,
				bits);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
		if (err)
			goto out;
		start = this_end + 1;
		goto search_again;
	}
	/*
	 * | ---- desired range ---- |
	 * | ---------- state ---------- |
	 * We need to split the extent, and set the bit
	 * on the first half
	 */
	err = split_state(tree, state, prealloc, end + 1);
	BUG_ON(err == -EEXIST);

	state->state |= bits;
	merge_state(tree, prealloc);
	prealloc = NULL;
out:
	if (prealloc)
		btrfs_free_extent_state(prealloc);
	return err;
search_again:
	if (start > end)
		goto out;
	goto again;
}

int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
{
	return set_extent_bits(tree, start, end, EXTENT_DIRTY);
}

int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
{
	return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
}

int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
			  u64 *start_ret, u64 *end_ret, int bits)
{
	struct cache_extent *node;
	struct extent_state *state;
	int ret = 1;

	/*
	 * this search will find all the extents that end after
	 * our range starts.
	 */
	node = search_cache_extent(&tree->state, start);
	if (!node)
		goto out;

	while(1) {
		state = container_of(node, struct extent_state, cache_node);
		if (state->end >= start && (state->state & bits)) {
			*start_ret = state->start;
			*end_ret = state->end;
			ret = 0;
			break;
		}
		node = next_cache_extent(node);
		if (!node)
			break;
	}
out:
	return ret;
}

int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
		   int bits, int filled)
{
	struct extent_state *state = NULL;
	struct cache_extent *node;
	int bitset = 0;

	node = search_cache_extent(&tree->state, start);
	while (node && start <= end) {
		state = container_of(node, struct extent_state, cache_node);

		if (filled && state->start > start) {
			bitset = 0;
			break;
		}
		if (state->start > end)
			break;
		if (state->state & bits) {
			bitset = 1;
			if (!filled)
				break;
		} else if (filled) {
			bitset = 0;
			break;
		}
		start = state->end + 1;
		if (start > end)
			break;
		node = next_cache_extent(node);
		if (!node) {
			if (filled)
				bitset = 0;
			break;
		}
	}
	return bitset;
}

int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
{
	struct cache_extent *node;
	struct extent_state *state;
	int ret = 0;

	node = search_cache_extent(&tree->state, start);
	if (!node) {
		ret = -ENOENT;
		goto out;
	}
	state = container_of(node, struct extent_state, cache_node);
	if (state->start != start) {
		ret = -ENOENT;
		goto out;
	}
	state->xprivate = private;
out:
	return ret;
}

int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
{
	struct cache_extent *node;
	struct extent_state *state;
	int ret = 0;

	node = search_cache_extent(&tree->state, start);
	if (!node) {
		ret = -ENOENT;
		goto out;
	}
	state = container_of(node, struct extent_state, cache_node);
	if (state->start != start) {
		ret = -ENOENT;
		goto out;
	}
	*private = state->xprivate;
out:
	return ret;
}

static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
						   u64 bytenr, u32 blocksize)
{
	struct extent_buffer *eb;

	eb = calloc(1, sizeof(struct extent_buffer) + blocksize);
	if (!eb)
		return NULL;

	eb->start = bytenr;
	eb->len = blocksize;
	eb->refs = 1;
	eb->flags = 0;
	eb->cache_node.start = bytenr;
	eb->cache_node.size = blocksize;
	eb->fs_info = info;
	INIT_LIST_HEAD(&eb->recow);
	INIT_LIST_HEAD(&eb->lru);
	memset_extent_buffer(eb, 0, 0, blocksize);

	return eb;
}

struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
{
	struct extent_buffer *new;

	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
	if (!new)
		return NULL;

	copy_extent_buffer(new, src, 0, 0, src->len);
	new->flags |= EXTENT_BUFFER_DUMMY;

	return new;
}

static void free_extent_buffer_final(struct extent_buffer *eb)
{
	BUG_ON(eb->refs);
	list_del_init(&eb->lru);
	if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
		struct extent_io_tree *tree = &eb->fs_info->extent_cache;

		remove_cache_extent(&tree->cache, &eb->cache_node);
		BUG_ON(tree->cache_size < eb->len);
		tree->cache_size -= eb->len;
	}
	free(eb);
}

static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
{
	if (!eb || IS_ERR(eb))
		return;

	eb->refs--;
	BUG_ON(eb->refs < 0);
	if (eb->refs == 0) {
		if (eb->flags & EXTENT_DIRTY) {
			warning(
			"dirty eb leak (aborted trans): start %llu len %u",
				eb->start, eb->len);
		}
		list_del_init(&eb->recow);
		if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
			free_extent_buffer_final(eb);
	}
}

void free_extent_buffer(struct extent_buffer *eb)
{
	free_extent_buffer_internal(eb, 0);
}

void free_extent_buffer_nocache(struct extent_buffer *eb)
{
	free_extent_buffer_internal(eb, 1);
}

struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
					 u64 bytenr, u32 blocksize)
{
	struct extent_buffer *eb = NULL;
	struct cache_extent *cache;

	cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
	if (cache && cache->start == bytenr &&
	    cache->size == blocksize) {
		eb = container_of(cache, struct extent_buffer, cache_node);
		list_move_tail(&eb->lru, &tree->lru);
		eb->refs++;
	}
	return eb;
}

struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
					       u64 start)
{
	struct extent_buffer *eb = NULL;
	struct cache_extent *cache;

	cache = search_cache_extent(&tree->cache, start);
	if (cache) {
		eb = container_of(cache, struct extent_buffer, cache_node);
		list_move_tail(&eb->lru, &tree->lru);
		eb->refs++;
	}
	return eb;
}

static void trim_extent_buffer_cache(struct extent_io_tree *tree)
{
	struct extent_buffer *eb, *tmp;

	list_for_each_entry_safe(eb, tmp, &tree->lru, lru) {
		if (eb->refs == 0)
			free_extent_buffer_final(eb);
		if (tree->cache_size <= ((tree->max_cache_size * 9) / 10))
			break;
	}
}

struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
					  u64 bytenr, u32 blocksize)
{
	struct extent_buffer *eb;
	struct extent_io_tree *tree = &fs_info->extent_cache;
	struct cache_extent *cache;

	cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
	if (cache && cache->start == bytenr &&
	    cache->size == blocksize) {
		eb = container_of(cache, struct extent_buffer, cache_node);
		list_move_tail(&eb->lru, &tree->lru);
		eb->refs++;
	} else {
		int ret;

		if (cache) {
			eb = container_of(cache, struct extent_buffer,
					  cache_node);
			free_extent_buffer(eb);
		}
		eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
		if (!eb)
			return NULL;
		ret = insert_cache_extent(&tree->cache, &eb->cache_node);
		if (ret) {
			free(eb);
			return NULL;
		}
		list_add_tail(&eb->lru, &tree->lru);
		tree->cache_size += blocksize;
		if (tree->cache_size >= tree->max_cache_size)
			trim_extent_buffer_cache(tree);
	}
	return eb;
}

/*
 * Allocate a dummy extent buffer which won't be inserted into extent buffer
 * cache.
 *
 * This mostly allows super block read write using existing eb infrastructure
 * without pulluting the eb cache.
 *
 * This is especially important to avoid injecting eb->start == SZ_64K, as
 * fuzzed image could have invalid tree bytenr covers super block range,
 * and cause ref count underflow.
 */
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
						u64 bytenr, u32 blocksize)
{
	struct extent_buffer *ret;

	ret = __alloc_extent_buffer(fs_info, bytenr, blocksize);
	if (!ret)
		return NULL;

	ret->flags |= EXTENT_BUFFER_DUMMY;

	return ret;
}

static int read_raid56(struct btrfs_fs_info *fs_info, void *buf, u64 logical,
		       u64 len, int mirror, struct btrfs_multi_bio *multi,
		       u64 *raid_map)
{
	const int num_stripes = multi->num_stripes;
	const u64 full_stripe_start = raid_map[0];
	void **pointers = NULL;
	int failed_a = -1;
	int failed_b = -1;
	int i;
	int ret;

	/* Only read repair should go this path */
	ASSERT(mirror > 1);
	ASSERT(raid_map);

	/* The read length should be inside one stripe */
	ASSERT(len <= BTRFS_STRIPE_LEN);

	pointers = calloc(num_stripes, sizeof(void *));
	if (!pointers) {
		ret = -ENOMEM;
		goto out;
	}
	/* Allocate memory for the full stripe */
	for (i = 0; i < num_stripes; i++) {
		pointers[i] = malloc(BTRFS_STRIPE_LEN);
		if (!pointers[i]) {
			ret = -ENOMEM;
			goto out;
		}
	}

	/*
	 * Read the full stripe.
	 *
	 * The stripes in @multi is not rotated, thus can be used to read from
	 * disk directly.
	 */
	for (i = 0; i < num_stripes; i++) {
		ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
				  BTRFS_STRIPE_LEN, multi->stripes[i].physical,
				  fs_info->zoned);
		if (ret < BTRFS_STRIPE_LEN) {
			ret = -EIO;
			goto out;
		}
	}

	/*
	 * Get the failed index.
	 *
	 * Since we're reading using mirror_num > 1 already, it means the data
	 * stripe where @logical lies in is definitely corrupted.
	 */
	failed_a = (logical - full_stripe_start) / BTRFS_STRIPE_LEN;

	/*
	 * For RAID6, we don't have good way to exhaust all the combinations,
	 * so here we can only go through the map to see if we have missing devices.
	 */
	if (multi->type & BTRFS_BLOCK_GROUP_RAID6) {
		for (i = 0; i < num_stripes; i++) {
			/* Skip failed_a, as it's already marked failed */
			if (i == failed_a)
				continue;
			/* Missing dev */
			if (multi->stripes[i].dev->fd == -1) {
				failed_b = i;
				break;
			}
		}
		/*
		 * No missing device, we have no better idea, default to P
		 * corruption
		 */
		if (failed_b < 0)
			failed_b = num_stripes - 2;
	}

	/* Rebuild the full stripe */
	ret = raid56_recov(num_stripes, BTRFS_STRIPE_LEN, multi->type,
			   failed_a, failed_b, pointers);
	ASSERT(ret == 0);

	/* Now copy the data back to original buf */
	memcpy(buf, pointers[failed_a] + (logical - full_stripe_start) %
			BTRFS_STRIPE_LEN, len);
	ret = 0;
out:
	for (i = 0; i < num_stripes; i++)
		free(pointers[i]);
	free(pointers);
	return ret;
}

int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 logical,
			u64 *len, int mirror)
{
	struct btrfs_multi_bio *multi = NULL;
	struct btrfs_device *device;
	u64 read_len = *len;
	u64 *raid_map = NULL;
	int ret;

	ret = btrfs_map_block(info, READ, logical, &read_len, &multi, mirror,
			      &raid_map);
	if (ret) {
		fprintf(stderr, "Couldn't map the block %llu\n", logical);
		return -EIO;
	}
	read_len = min(*len, read_len);

	/* We need to rebuild from P/Q */
	if (mirror > 1 && multi->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
		ret = read_raid56(info, buf, logical, read_len, mirror, multi,
				  raid_map);
		free(multi);
		free(raid_map);
		*len = read_len;
		return ret;
	}
	free(raid_map);
	device = multi->stripes[0].dev;

	if (device->fd <= 0) {
		kfree(multi);
		return -EIO;
	}

	ret = btrfs_pread(device->fd, buf, read_len,
			  multi->stripes[0].physical, info->zoned);
	kfree(multi);
	if (ret < 0) {
		fprintf(stderr, "Error reading %llu, %d\n", logical,
			ret);
		return ret;
	}
	if (ret != read_len) {
		fprintf(stderr,
			"Short read for %llu, read %d, read_len %llu\n",
			logical, ret, read_len);
		return -EIO;
	}
	*len = read_len;

	return 0;
}

/*
 * Write the data in @buf to logical bytenr @offset.
 *
 * Such data will be written to all mirrors and RAID56 P/Q will also be
 * properly handled.
 */
int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
		       u64 bytes)
{
	struct btrfs_multi_bio *multi = NULL;
	struct btrfs_device *device;
	u64 bytes_left = bytes;
	u64 this_len;
	u64 total_write = 0;
	u64 *raid_map = NULL;
	u64 dev_bytenr;
	int dev_nr;
	int ret = 0;

	while (bytes_left > 0) {
		this_len = bytes_left;
		dev_nr = 0;

		ret = btrfs_map_block(info, WRITE, offset, &this_len, &multi,
				      0, &raid_map);
		if (ret) {
			fprintf(stderr, "Couldn't map the block %llu\n",
				offset);
			return -EIO;
		}

		if (raid_map) {
			struct extent_buffer *eb;
			u64 stripe_len = this_len;

			this_len = min(this_len, bytes_left);
			this_len = min(this_len, (u64)info->nodesize);

			eb = malloc(sizeof(struct extent_buffer) + this_len);
			if (!eb) {
				fprintf(stderr, "cannot allocate memory for eb\n");
				ret = -ENOMEM;
				goto out;
			}

			memset(eb, 0, sizeof(struct extent_buffer) + this_len);
			eb->start = offset;
			eb->len = this_len;

			memcpy(eb->data, buf + total_write, this_len);
			ret = write_raid56_with_parity(info, eb, multi,
						       stripe_len, raid_map);
			BUG_ON(ret < 0);

			free(eb);
			kfree(raid_map);
			raid_map = NULL;
		} else while (dev_nr < multi->num_stripes) {
			device = multi->stripes[dev_nr].dev;
			if (device->fd <= 0) {
				kfree(multi);
				return -EIO;
			}

			dev_bytenr = multi->stripes[dev_nr].physical;
			this_len = min(this_len, bytes_left);
			dev_nr++;
			device->total_ios++;

			ret = btrfs_pwrite(device->fd, buf + total_write,
					   this_len, dev_bytenr, info->zoned);
			if (ret != this_len) {
				if (ret < 0) {
					fprintf(stderr, "Error writing to "
						"device %d\n", errno);
					ret = errno;
					kfree(multi);
					return ret;
				} else {
					fprintf(stderr, "Short write\n");
					kfree(multi);
					return -EIO;
				}
			}
		}

		BUG_ON(bytes_left < this_len);

		bytes_left -= this_len;
		offset += this_len;
		total_write += this_len;

		kfree(multi);
		multi = NULL;
	}
	return 0;

out:
	kfree(raid_map);
	return ret;
}

int set_extent_buffer_dirty(struct extent_buffer *eb)
{
	struct extent_io_tree *tree = &eb->fs_info->extent_cache;
	if (!(eb->flags & EXTENT_DIRTY)) {
		eb->flags |= EXTENT_DIRTY;
		set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
		extent_buffer_get(eb);
	}
	return 0;
}

int clear_extent_buffer_dirty(struct extent_buffer *eb)
{
	struct extent_io_tree *tree = &eb->fs_info->extent_cache;
	if (eb->flags & EXTENT_DIRTY) {
		eb->flags &= ~EXTENT_DIRTY;
		clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
		free_extent_buffer(eb);
	}
	return 0;
}

int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
			 unsigned long start, unsigned long len)
{
	return memcmp(eb->data + start, ptrv, len);
}

void read_extent_buffer(const struct extent_buffer *eb, void *dst,
			unsigned long start, unsigned long len)
{
	memcpy(dst, eb->data + start, len);
}

void write_extent_buffer(struct extent_buffer *eb, const void *src,
			 unsigned long start, unsigned long len)
{
	memcpy(eb->data + start, src, len);
}

void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
			unsigned long dst_offset, unsigned long src_offset,
			unsigned long len)
{
	memcpy(dst->data + dst_offset, src->data + src_offset, len);
}

void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
			   unsigned long src_offset, unsigned long len)
{
	memmove(dst->data + dst_offset, dst->data + src_offset, len);
}

void memset_extent_buffer(struct extent_buffer *eb, char c,
			  unsigned long start, unsigned long len)
{
	memset(eb->data + start, c, len);
}

int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
			   unsigned long nr)
{
	return le_test_bit(nr, (u8 *)eb->data + start);
}