537 lines
15 KiB
C
537 lines
15 KiB
C
/*
|
|
* Copyright (C) 2011 Red Hat. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include "kerncompat.h"
|
|
#include <sys/time.h>
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <stdbool.h>
|
|
#include <unistd.h>
|
|
#include "kernel-lib/rbtree.h"
|
|
#include "kernel-lib/rbtree_types.h"
|
|
#include "kernel-shared/accessors.h"
|
|
#include "kernel-shared/uapi/btrfs_tree.h"
|
|
#include "kernel-shared/ctree.h"
|
|
#include "kernel-shared/disk-io.h"
|
|
#include "kernel-shared/extent_io.h"
|
|
#include "kernel-shared/file-item.h"
|
|
#include "kernel-shared/tree-checker.h"
|
|
#include "common/help.h"
|
|
#include "common/messages.h"
|
|
#include "common/open-utils.h"
|
|
#include "common/string-utils.h"
|
|
#include "common/units.h"
|
|
#include "cmds/commands.h"
|
|
|
|
static int verbose = 0;
|
|
|
|
struct seek {
|
|
u64 distance;
|
|
u64 count;
|
|
struct rb_node n;
|
|
};
|
|
|
|
struct root_stats {
|
|
u64 total_nodes;
|
|
u64 total_bytes;
|
|
u64 total_inline;
|
|
u64 total_seeks;
|
|
u64 forward_seeks;
|
|
u64 backward_seeks;
|
|
u64 total_seek_len;
|
|
u64 max_seek_len;
|
|
u64 total_clusters;
|
|
u64 total_cluster_size;
|
|
u64 min_cluster_size;
|
|
u64 max_cluster_size;
|
|
u64 lowest_bytenr;
|
|
u64 highest_bytenr;
|
|
u64 node_counts[BTRFS_MAX_LEVEL];
|
|
struct rb_root seek_root;
|
|
int total_levels;
|
|
};
|
|
|
|
static int add_seek(struct rb_root *root, u64 dist)
|
|
{
|
|
struct rb_node **p = &root->rb_node;
|
|
struct rb_node *parent = NULL;
|
|
struct seek *seek = NULL;
|
|
|
|
while (*p) {
|
|
parent = *p;
|
|
seek = rb_entry(parent, struct seek, n);
|
|
|
|
if (dist < seek->distance) {
|
|
p = &(*p)->rb_left;
|
|
} else if (dist > seek->distance) {
|
|
p = &(*p)->rb_right;
|
|
} else {
|
|
seek->count++;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
seek = malloc(sizeof(struct seek));
|
|
if (!seek)
|
|
return -ENOMEM;
|
|
seek->distance = dist;
|
|
seek->count = 1;
|
|
rb_link_node(&seek->n, parent, p);
|
|
rb_insert_color(&seek->n, root);
|
|
return 0;
|
|
}
|
|
|
|
static int walk_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
|
struct root_stats *stat, int find_inline)
|
|
{
|
|
struct extent_buffer *b = path->nodes[0];
|
|
struct btrfs_file_extent_item *fi;
|
|
struct btrfs_key found_key;
|
|
int i;
|
|
|
|
stat->total_bytes += root->fs_info->nodesize;
|
|
stat->total_nodes++;
|
|
stat->node_counts[0]++;
|
|
|
|
if (!find_inline)
|
|
return 0;
|
|
|
|
for (i = 0; i < btrfs_header_nritems(b); i++) {
|
|
btrfs_item_key_to_cpu(b, &found_key, i);
|
|
if (found_key.type != BTRFS_EXTENT_DATA_KEY)
|
|
continue;
|
|
|
|
fi = btrfs_item_ptr(b, i, struct btrfs_file_extent_item);
|
|
if (btrfs_file_extent_type(b, fi) == BTRFS_FILE_EXTENT_INLINE)
|
|
stat->total_inline +=
|
|
btrfs_file_extent_inline_item_len(b, i);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static u64 calc_distance(u64 block1, u64 block2)
|
|
{
|
|
if (block1 < block2)
|
|
return block2 - block1;
|
|
return block1 - block2;
|
|
}
|
|
|
|
static int walk_nodes(struct btrfs_root *root, struct btrfs_path *path,
|
|
struct root_stats *stat, int level, int find_inline)
|
|
{
|
|
struct extent_buffer *b = path->nodes[level];
|
|
u32 nodesize = root->fs_info->nodesize;
|
|
u64 last_block;
|
|
u64 cluster_size = nodesize;
|
|
int i;
|
|
int ret = 0;
|
|
|
|
stat->total_bytes += nodesize;
|
|
stat->total_nodes++;
|
|
stat->node_counts[level]++;
|
|
|
|
last_block = btrfs_header_bytenr(b);
|
|
for (i = 0; i < btrfs_header_nritems(b); i++) {
|
|
struct extent_buffer *tmp = NULL;
|
|
u64 cur_blocknr = btrfs_node_blockptr(b, i);
|
|
|
|
path->slots[level] = i;
|
|
if ((level - 1) > 0 || find_inline) {
|
|
struct btrfs_tree_parent_check check = {
|
|
.owner_root = btrfs_header_owner(b),
|
|
.transid = btrfs_node_ptr_generation(b, i),
|
|
.level = level - 1,
|
|
};
|
|
tmp = read_tree_block(root->fs_info, cur_blocknr, &check);
|
|
if (!extent_buffer_uptodate(tmp)) {
|
|
error("failed to read blocknr %llu",
|
|
btrfs_node_blockptr(b, i));
|
|
continue;
|
|
}
|
|
path->nodes[level - 1] = tmp;
|
|
}
|
|
if (level - 1)
|
|
ret = walk_nodes(root, path, stat, level - 1,
|
|
find_inline);
|
|
else
|
|
ret = walk_leaf(root, path, stat, find_inline);
|
|
if (last_block + nodesize != cur_blocknr) {
|
|
u64 distance = calc_distance(last_block +
|
|
nodesize,
|
|
cur_blocknr);
|
|
stat->total_seeks++;
|
|
stat->total_seek_len += distance;
|
|
if (stat->max_seek_len < distance)
|
|
stat->max_seek_len = distance;
|
|
if (add_seek(&stat->seek_root, distance)) {
|
|
error("cannot add new seek at distance %llu", distance);
|
|
ret = -ENOMEM;
|
|
break;
|
|
}
|
|
|
|
if (last_block < cur_blocknr)
|
|
stat->forward_seeks++;
|
|
else
|
|
stat->backward_seeks++;
|
|
if (cluster_size != nodesize) {
|
|
stat->total_cluster_size += cluster_size;
|
|
stat->total_clusters++;
|
|
if (cluster_size < stat->min_cluster_size)
|
|
stat->min_cluster_size = cluster_size;
|
|
if (cluster_size > stat->max_cluster_size)
|
|
stat->max_cluster_size = cluster_size;
|
|
}
|
|
cluster_size = nodesize;
|
|
} else {
|
|
cluster_size += nodesize;
|
|
}
|
|
last_block = cur_blocknr;
|
|
if (cur_blocknr < stat->lowest_bytenr)
|
|
stat->lowest_bytenr = cur_blocknr;
|
|
if (cur_blocknr > stat->highest_bytenr)
|
|
stat->highest_bytenr = cur_blocknr;
|
|
free_extent_buffer(tmp);
|
|
if (ret) {
|
|
error("walking down path failed: %d", ret);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void print_seek_histogram(struct root_stats *stat)
|
|
{
|
|
struct rb_node *n = rb_first(&stat->seek_root);
|
|
struct seek *seek;
|
|
u64 tick_interval;
|
|
u64 group_start = 0;
|
|
u64 group_count = 0;
|
|
u64 group_end = 0;
|
|
u64 i;
|
|
u64 max_seek = stat->max_seek_len;
|
|
int digits = 1;
|
|
|
|
if (stat->total_seeks < 20)
|
|
return;
|
|
|
|
while ((max_seek /= 10))
|
|
digits++;
|
|
|
|
/* Make a tick count as 5% of the total seeks */
|
|
tick_interval = stat->total_seeks / 20;
|
|
pr_verbose(LOG_DEFAULT, "\tSeek histogram\n");
|
|
for (; n; n = rb_next(n)) {
|
|
u64 ticks, gticks = 0;
|
|
|
|
seek = rb_entry(n, struct seek, n);
|
|
ticks = seek->count / tick_interval;
|
|
if (group_count)
|
|
gticks = group_count / tick_interval;
|
|
|
|
if (ticks <= 2 && gticks <= 2) {
|
|
if (group_count == 0)
|
|
group_start = seek->distance;
|
|
group_end = seek->distance;
|
|
group_count += seek->count;
|
|
continue;
|
|
}
|
|
|
|
if (group_count) {
|
|
|
|
gticks = group_count / tick_interval;
|
|
pr_verbose(LOG_DEFAULT, "\t\t%*llu - %*llu: %*llu ", digits, group_start,
|
|
digits, group_end, digits, group_count);
|
|
if (gticks) {
|
|
for (i = 0; i < gticks; i++)
|
|
pr_verbose(LOG_DEFAULT, "#");
|
|
pr_verbose(LOG_DEFAULT, "\n");
|
|
} else {
|
|
pr_verbose(LOG_DEFAULT, "|\n");
|
|
}
|
|
group_count = 0;
|
|
}
|
|
|
|
if (ticks <= 2)
|
|
continue;
|
|
|
|
pr_verbose(LOG_DEFAULT, "\t\t%*llu - %*llu: %*llu ", digits, seek->distance,
|
|
digits, seek->distance, digits, seek->count);
|
|
for (i = 0; i < ticks; i++)
|
|
pr_verbose(LOG_DEFAULT, "#");
|
|
pr_verbose(LOG_DEFAULT, "\n");
|
|
}
|
|
if (group_count) {
|
|
u64 gticks;
|
|
|
|
gticks = group_count / tick_interval;
|
|
pr_verbose(LOG_DEFAULT, "\t\t%*llu - %*llu: %*llu ", digits, group_start,
|
|
digits, group_end, digits, group_count);
|
|
if (gticks) {
|
|
for (i = 0; i < gticks; i++)
|
|
pr_verbose(LOG_DEFAULT, "#");
|
|
pr_verbose(LOG_DEFAULT, "\n");
|
|
} else {
|
|
pr_verbose(LOG_DEFAULT, "|\n");
|
|
}
|
|
group_count = 0;
|
|
}
|
|
}
|
|
|
|
static void timeval_subtract(struct timeval *result, struct timeval *x,
|
|
struct timeval *y)
|
|
{
|
|
if (x->tv_usec < y->tv_usec) {
|
|
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
|
|
y->tv_usec -= 1000000 * nsec;
|
|
y->tv_sec += nsec;
|
|
}
|
|
|
|
if (x->tv_usec - y->tv_usec > 1000000) {
|
|
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
|
|
y->tv_usec += 1000000 * nsec;
|
|
y->tv_sec -= nsec;
|
|
}
|
|
|
|
result->tv_sec = x->tv_sec - y->tv_sec;
|
|
result->tv_usec = x->tv_usec - y->tv_usec;
|
|
}
|
|
|
|
static int calc_root_size(struct btrfs_root *tree_root, struct btrfs_key *key,
|
|
int find_inline, unsigned int unit_mode)
|
|
{
|
|
struct btrfs_root *root;
|
|
struct btrfs_path path = { 0 };
|
|
struct rb_node *n;
|
|
struct timeval start, end, diff = {0};
|
|
struct root_stats stat;
|
|
int level;
|
|
int ret = 0;
|
|
int size_fail = 0;
|
|
int i;
|
|
|
|
root = btrfs_read_fs_root(tree_root->fs_info, key);
|
|
if (IS_ERR(root)) {
|
|
error("failed to read root %llu", key->objectid);
|
|
return 1;
|
|
}
|
|
|
|
memset(&stat, 0, sizeof(stat));
|
|
level = btrfs_header_level(root->node);
|
|
stat.lowest_bytenr = btrfs_header_bytenr(root->node);
|
|
stat.highest_bytenr = stat.lowest_bytenr;
|
|
stat.min_cluster_size = (u64)-1;
|
|
stat.max_cluster_size = root->fs_info->nodesize;
|
|
path.nodes[level] = root->node;
|
|
if (gettimeofday(&start, NULL)) {
|
|
error("cannot get time: %m");
|
|
goto out;
|
|
}
|
|
if (!level) {
|
|
ret = walk_leaf(root, &path, &stat, find_inline);
|
|
if (ret)
|
|
goto out;
|
|
goto out_print;
|
|
}
|
|
|
|
ret = walk_nodes(root, &path, &stat, level, find_inline);
|
|
if (ret)
|
|
goto out;
|
|
if (gettimeofday(&end, NULL)) {
|
|
error("cannot get time: %m");
|
|
goto out;
|
|
}
|
|
timeval_subtract(&diff, &end, &start);
|
|
out_print:
|
|
if (stat.min_cluster_size == (u64)-1) {
|
|
stat.min_cluster_size = 0;
|
|
stat.total_clusters = 1;
|
|
}
|
|
|
|
if (unit_mode == UNITS_RAW || size_fail) {
|
|
pr_verbose(LOG_DEFAULT, "\tTotal size: %llu\n", stat.total_bytes);
|
|
pr_verbose(LOG_DEFAULT, "\t\tInline data: %llu\n", stat.total_inline);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal seeks: %llu\n", stat.total_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tForward seeks: %llu\n", stat.forward_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tBackward seeks: %llu\n", stat.backward_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tAvg seek len: %llu\n", stat.total_seeks ?
|
|
stat.total_seek_len / stat.total_seeks : 0);
|
|
print_seek_histogram(&stat);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal clusters: %llu\n", stat.total_clusters);
|
|
pr_verbose(LOG_DEFAULT, "\t\tAvg cluster size: %llu\n", stat.total_cluster_size /
|
|
stat.total_clusters);
|
|
pr_verbose(LOG_DEFAULT, "\t\tMin cluster size: %llu\n", stat.min_cluster_size);
|
|
pr_verbose(LOG_DEFAULT, "\t\tMax cluster size: %llu\n", stat.max_cluster_size);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal disk spread: %llu\n", stat.highest_bytenr -
|
|
stat.lowest_bytenr);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
|
|
(int)diff.tv_usec);
|
|
} else {
|
|
pr_verbose(LOG_DEFAULT, "\tTotal size: %s\n", pretty_size_mode(stat.total_bytes, unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\t\tInline data: %s\n", pretty_size_mode(stat.total_inline, unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\tTotal seeks: %llu\n", stat.total_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tForward seeks: %llu\n", stat.forward_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tBackward seeks: %llu\n", stat.backward_seeks);
|
|
pr_verbose(LOG_DEFAULT, "\t\tAvg seek len: %s\n", stat.total_seeks ?
|
|
pretty_size_mode(stat.total_seek_len / stat.total_seeks, unit_mode) :
|
|
pretty_size_mode(0, unit_mode));
|
|
print_seek_histogram(&stat);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal clusters: %llu\n", stat.total_clusters);
|
|
pr_verbose(LOG_DEFAULT, "\t\tAvg cluster size: %s\n",
|
|
pretty_size_mode((stat.total_cluster_size /
|
|
stat.total_clusters), unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\t\tMin cluster size: %s\n",
|
|
pretty_size_mode(stat.min_cluster_size, unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\t\tMax cluster size: %s\n",
|
|
pretty_size_mode(stat.max_cluster_size, unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\tTotal disk spread: %s\n",
|
|
pretty_size_mode(stat.highest_bytenr - stat.lowest_bytenr, unit_mode));
|
|
pr_verbose(LOG_DEFAULT, "\tTotal read time: %d s %d us\n", (int)diff.tv_sec,
|
|
(int)diff.tv_usec);
|
|
}
|
|
pr_verbose(LOG_DEFAULT, "\tLevels: %d\n", level + 1);
|
|
pr_verbose(LOG_DEFAULT, "\tTotal nodes: %llu\n", stat.total_nodes);
|
|
for (i = 0; i < level + 1; i++) {
|
|
pr_verbose(LOG_DEFAULT, "\t\tOn level %d: %8llu", i, stat.node_counts[i]);
|
|
if (i > 0) {
|
|
u64 fanout;
|
|
|
|
fanout = stat.node_counts[i - 1];
|
|
fanout /= stat.node_counts[i];
|
|
pr_verbose(LOG_DEFAULT, " (avg fanout %llu)", fanout);
|
|
}
|
|
pr_verbose(LOG_DEFAULT, "\n");
|
|
}
|
|
out:
|
|
while ((n = rb_first(&stat.seek_root)) != NULL) {
|
|
struct seek *seek = rb_entry(n, struct seek, n);
|
|
rb_erase(n, &stat.seek_root);
|
|
free(seek);
|
|
}
|
|
|
|
/*
|
|
* We only use path to save node data in iterating, without holding
|
|
* eb's ref_cnt in path. Don't use btrfs_release_path() here, it will
|
|
* free these eb again, and cause many problems, as negative ref_cnt or
|
|
* invalid memory access.
|
|
*/
|
|
return ret;
|
|
}
|
|
|
|
static const char * const cmd_inspect_tree_stats_usage[] = {
|
|
"btrfs inspect-internal tree-stats [options] <device>",
|
|
"Print various stats for trees",
|
|
"",
|
|
OPTLINE("-b", "raw numbers in bytes"),
|
|
HELPINFO_UNITS_LONG,
|
|
OPTLINE("-t <rootid>", "print only tree with the given rootid"),
|
|
NULL
|
|
};
|
|
|
|
static int cmd_inspect_tree_stats(const struct cmd_struct *cmd,
|
|
int argc, char **argv)
|
|
{
|
|
struct btrfs_key key = { .type = BTRFS_ROOT_ITEM_KEY };
|
|
struct btrfs_root *root;
|
|
unsigned int unit_mode = UNITS_DEFAULT;
|
|
int opt;
|
|
int ret = 0;
|
|
u64 tree_id = 0;
|
|
|
|
unit_mode = get_unit_mode_from_arg(&argc, argv, 0);
|
|
|
|
optind = 0;
|
|
while ((opt = getopt(argc, argv, "vbt:")) != -1) {
|
|
switch (opt) {
|
|
case 'v':
|
|
verbose++;
|
|
break;
|
|
case 'b':
|
|
unit_mode = UNITS_RAW;
|
|
break;
|
|
case 't':
|
|
tree_id = arg_strtou64(optarg);
|
|
if (!tree_id) {
|
|
error("unrecognized tree id: %s", optarg);
|
|
exit(1);
|
|
}
|
|
break;
|
|
default:
|
|
usage_unknown_option(cmd, argv);
|
|
}
|
|
}
|
|
|
|
if (check_argc_exact(argc - optind, 1))
|
|
return 1;
|
|
|
|
ret = check_mounted(argv[optind]);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
warning("unable to check mount status of: %m");
|
|
} else if (ret) {
|
|
warning("%s already mounted, tree-stats accesses the block devices directly, this may\n"
|
|
"\tresult in inaccurate numbers, various errors or it may crash if the filesystem\n"
|
|
"\tchanges unexpectedly, restart if needed or remount read-only", argv[optind]);
|
|
}
|
|
|
|
root = open_ctree(argv[optind], 0, 0);
|
|
if (!root) {
|
|
error("cannot open ctree");
|
|
exit(1);
|
|
}
|
|
|
|
if (tree_id) {
|
|
pr_verbose(LOG_DEFAULT, "Calculating size of tree (%llu)\n", tree_id);
|
|
key.objectid = tree_id;
|
|
key.offset = (u64)-1;
|
|
ret = calc_root_size(root, &key, 1, unit_mode);
|
|
goto out;
|
|
}
|
|
|
|
pr_verbose(LOG_DEFAULT, "Calculating size of root tree\n");
|
|
key.objectid = BTRFS_ROOT_TREE_OBJECTID;
|
|
ret = calc_root_size(root, &key, 0, unit_mode);
|
|
if (ret)
|
|
goto out;
|
|
|
|
pr_verbose(LOG_DEFAULT, "Calculating size of extent tree\n");
|
|
key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
|
|
ret = calc_root_size(root, &key, 0, unit_mode);
|
|
if (ret)
|
|
goto out;
|
|
|
|
pr_verbose(LOG_DEFAULT, "Calculating size of csum tree\n");
|
|
key.objectid = BTRFS_CSUM_TREE_OBJECTID;
|
|
ret = calc_root_size(root, &key, 0, unit_mode);
|
|
if (ret)
|
|
goto out;
|
|
|
|
key.objectid = BTRFS_FS_TREE_OBJECTID;
|
|
key.offset = (u64)-1;
|
|
pr_verbose(LOG_DEFAULT, "Calculating size of fs tree\n");
|
|
ret = calc_root_size(root, &key, 1, unit_mode);
|
|
if (ret)
|
|
goto out;
|
|
out:
|
|
close_ctree(root);
|
|
return ret;
|
|
}
|
|
DEFINE_SIMPLE_COMMAND(inspect_tree_stats, "tree-stats");
|