mirror of
https://github.com/kdave/btrfs-progs
synced 2025-01-11 16:29:42 +00:00
btrfs-progs: fi du: Calculate space shared by each directory arguments file set
Here we define each file set as those found by a recursive search of a single directory argument to btrfs fi du. This isn't as simple as adding up shared extents - they may be shared with each other, and may also overlap. This patch uses an interval tree to store shared extents we find while fiemapping files. After collecting them, a 'set shared' count is calculated by summing (without overlap) each shared region discovered. This is then displayed to the user as 'set shared'. Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
a2e7ff1eae
commit
bc23667bd2
272
cmds-fi-du.c
272
cmds-fi-du.c
@ -34,12 +34,162 @@
|
||||
#include "kerncompat.h"
|
||||
#include "rbtree.h"
|
||||
|
||||
#include "interval_tree_generic.h"
|
||||
|
||||
static int summarize = 0;
|
||||
static unsigned unit_mode = UNITS_RAW;
|
||||
static char path[PATH_MAX] = { 0, };
|
||||
static char *pathp = path;
|
||||
static char *path_max = &path[PATH_MAX - 1];
|
||||
|
||||
struct shared_extent {
|
||||
struct rb_node rb;
|
||||
u64 start; /* Start of interval */
|
||||
u64 last; /* Last location _in_ interval */
|
||||
u64 __subtree_last;
|
||||
};
|
||||
|
||||
/*
|
||||
* extent_tree_* functions are defined in the massive interval tree
|
||||
* macro below. This serves to illustrate the api in human-readable
|
||||
* terms.
|
||||
*/
|
||||
static void
|
||||
extent_tree_insert(struct shared_extent *node, struct rb_root *root);
|
||||
|
||||
static void
|
||||
extent_tree_remove(struct shared_extent *node, struct rb_root *root);
|
||||
|
||||
static struct shared_extent *
|
||||
extent_tree_iter_first(struct rb_root *root,
|
||||
u64 start, u64 last);
|
||||
|
||||
static struct shared_extent *
|
||||
extent_tree_iter_next(struct shared_extent *node,
|
||||
u64 start, u64 last);
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
#define LAST(node) ((node)->last)
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct shared_extent, rb,
|
||||
u64, __subtree_last,
|
||||
START, LAST, static, extent_tree)
|
||||
|
||||
static int add_shared_extent(u64 start, u64 len, struct rb_root *root)
|
||||
{
|
||||
struct shared_extent *sh;
|
||||
|
||||
BUG_ON(len == 0);
|
||||
|
||||
sh = calloc(1, sizeof(*sh));
|
||||
if (!sh)
|
||||
return ENOMEM;
|
||||
|
||||
sh->start = start;
|
||||
sh->last = (start + len - 1);
|
||||
|
||||
extent_tree_insert(sh, root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cleanup_shared_extents(struct rb_root *root)
|
||||
{
|
||||
struct shared_extent *s;
|
||||
struct shared_extent *tmp;
|
||||
|
||||
if (!root)
|
||||
return;
|
||||
|
||||
s = extent_tree_iter_first(root, 0, -1ULL);
|
||||
while (s) {
|
||||
tmp = extent_tree_iter_next(s, 0, -1ULL);
|
||||
extent_tree_remove(s, root);
|
||||
|
||||
free(s);
|
||||
s = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
#define dprintf(...)
|
||||
|
||||
/*
|
||||
* Find all extents which overlap 'n', calculate the space
|
||||
* covered by them and remove those nodes from the tree.
|
||||
*/
|
||||
static u64 count_unique_bytes(struct rb_root *root, struct shared_extent *n)
|
||||
{
|
||||
struct shared_extent *tmp;
|
||||
u64 wstart = n->start;
|
||||
u64 wlast = n->last;
|
||||
|
||||
dprintf("Count overlaps:");
|
||||
|
||||
do {
|
||||
/*
|
||||
* Expand our search window based on the lastest
|
||||
* overlapping extent. Doing this will allow us to
|
||||
* find all possible overlaps
|
||||
*/
|
||||
if (wstart > n->start)
|
||||
wstart = n->start;
|
||||
if (wlast < n->last)
|
||||
wlast = n->last;
|
||||
|
||||
dprintf(" (%llu, %llu)", n->start, n->last);
|
||||
|
||||
tmp = n;
|
||||
n = extent_tree_iter_next(n, wstart, wlast);
|
||||
|
||||
extent_tree_remove(tmp, root);
|
||||
free(tmp);
|
||||
} while (n);
|
||||
|
||||
dprintf("; wstart: %llu wlast: %llu total: %llu\n", wstart,
|
||||
wlast, wlast - wstart + 1);
|
||||
|
||||
return wlast - wstart + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* What we want to do here is get a count of shared bytes within the
|
||||
* set of extents we have collected. Specifcally, we don't want to
|
||||
* count any byte more than once, so just adding them up doesn't
|
||||
* work.
|
||||
*
|
||||
* For each set of overlapping extents we find the lowest start and
|
||||
* highest end. From there we have the actual number of bytes which is
|
||||
* shared across all of the extents in our set. A sum of each sets
|
||||
* extent length is returned.
|
||||
*/
|
||||
static void count_shared_bytes(struct rb_root *root, u64 *ret_cnt)
|
||||
{
|
||||
u64 count = 0;
|
||||
struct shared_extent *s = extent_tree_iter_first(root,
|
||||
0, -1ULL);
|
||||
|
||||
if (!s)
|
||||
goto out;
|
||||
|
||||
while (s) {
|
||||
/*
|
||||
* Find all extents which overlap 's', calculate the space
|
||||
* covered by them and remove those nodes from the tree.
|
||||
*/
|
||||
count += count_unique_bytes(root, s);
|
||||
|
||||
/*
|
||||
* Since count_unique_bytes will be emptying the tree,
|
||||
* we can grab the first node here
|
||||
*/
|
||||
s = extent_tree_iter_first(root, 0, -1ULL);
|
||||
}
|
||||
|
||||
BUG_ON(!RB_EMPTY_ROOT(root));
|
||||
out:
|
||||
*ret_cnt = count;
|
||||
}
|
||||
|
||||
/* Track which inodes we've seen for the purposes of hardlink detection. */
|
||||
struct seen_inode {
|
||||
struct rb_node i_node;
|
||||
@ -112,7 +262,21 @@ static int inode_seen(u64 ino, u64 subvol)
|
||||
return EEXIST;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void clear_seen_inodes(void)
|
||||
{
|
||||
struct rb_node *n = rb_first(&seen_inodes);
|
||||
struct seen_inode *si;
|
||||
|
||||
while (n) {
|
||||
si = rb_entry(n, struct seen_inode, i_node);
|
||||
|
||||
rb_erase(&si->i_node, &seen_inodes);
|
||||
free(si);
|
||||
|
||||
n = rb_first(&seen_inodes);
|
||||
}
|
||||
}
|
||||
|
||||
const char * const cmd_filesystem_du_usage[] = {
|
||||
@ -130,7 +294,7 @@ const char * const cmd_filesystem_du_usage[] = {
|
||||
* space they will use yet.
|
||||
*/
|
||||
#define SKIP_FLAGS (FIEMAP_EXTENT_UNKNOWN|FIEMAP_EXTENT_DELALLOC|FIEMAP_EXTENT_DATA_INLINE)
|
||||
static int du_calc_file_space(int dirfd, const char *filename,
|
||||
static int du_calc_file_space(int fd, struct rb_root *shared_extents,
|
||||
uint64_t *ret_total, uint64_t *ret_shared)
|
||||
{
|
||||
char buf[16384];
|
||||
@ -142,28 +306,19 @@ static int du_calc_file_space(int dirfd, const char *filename,
|
||||
int last = 0;
|
||||
int rc;
|
||||
u64 ext_len;
|
||||
int fd;
|
||||
u64 file_total = 0;
|
||||
u64 file_shared = 0;
|
||||
u32 flags;
|
||||
|
||||
memset(fiemap, 0, sizeof(struct fiemap));
|
||||
|
||||
fd = openat(dirfd, filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
ret = errno;
|
||||
fprintf(stderr, "ERROR: can't access '%s': %s\n",
|
||||
filename, strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
do {
|
||||
fiemap->fm_length = ~0ULL;
|
||||
fiemap->fm_extent_count = count;
|
||||
rc = ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap);
|
||||
if (rc < 0) {
|
||||
ret = errno;
|
||||
goto out_close;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If 0 extents are returned, then more ioctls are not needed */
|
||||
@ -181,8 +336,17 @@ static int du_calc_file_space(int dirfd, const char *filename,
|
||||
continue;
|
||||
|
||||
file_total += ext_len;
|
||||
if (flags & FIEMAP_EXTENT_SHARED)
|
||||
if (flags & FIEMAP_EXTENT_SHARED) {
|
||||
file_shared += ext_len;
|
||||
|
||||
if (shared_extents) {
|
||||
ret = add_shared_extent(fm_ext[i].fe_physical,
|
||||
ext_len,
|
||||
shared_extents);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fiemap->fm_start = (fm_ext[i - 1].fe_logical +
|
||||
@ -193,28 +357,27 @@ static int du_calc_file_space(int dirfd, const char *filename,
|
||||
*ret_shared = file_shared;
|
||||
|
||||
ret = 0;
|
||||
out_close:
|
||||
close(fd);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct du_dir_ctxt {
|
||||
uint64_t bytes_total;
|
||||
uint64_t bytes_shared;
|
||||
DIR *dirstream;
|
||||
struct rb_root shared_extents;
|
||||
};
|
||||
#define INIT_DU_DIR_CTXT (struct du_dir_ctxt) { 0ULL, 0ULL, NULL, RB_ROOT }
|
||||
|
||||
static int du_add_file(const char *filename, int dirfd, uint64_t *ret_total,
|
||||
static int du_add_file(const char *filename, int dirfd,
|
||||
struct rb_root *shared_extents, uint64_t *ret_total,
|
||||
uint64_t *ret_shared, int top_level);
|
||||
|
||||
static int du_walk_dir(struct du_dir_ctxt *ctxt)
|
||||
static int du_walk_dir(struct du_dir_ctxt *ctxt, struct rb_root *shared_extents)
|
||||
{
|
||||
int fd, ret, type;
|
||||
DIR *dirstream = NULL;
|
||||
int ret, type;
|
||||
struct dirent *entry;
|
||||
|
||||
fd = open_file_or_dir(path, &dirstream);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
DIR *dirstream = ctxt->dirstream;
|
||||
|
||||
ret = 0;
|
||||
do {
|
||||
@ -232,8 +395,9 @@ static int du_walk_dir(struct du_dir_ctxt *ctxt)
|
||||
tot = shr = 0;
|
||||
|
||||
ret = du_add_file(entry->d_name,
|
||||
dirfd(dirstream), &tot,
|
||||
&shr, 0);
|
||||
dirfd(dirstream),
|
||||
shared_extents, &tot, &shr,
|
||||
0);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -243,19 +407,21 @@ static int du_walk_dir(struct du_dir_ctxt *ctxt)
|
||||
}
|
||||
} while (entry != NULL);
|
||||
|
||||
close_file_or_dir(fd, dirstream);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int du_add_file(const char *filename, int dirfd, uint64_t *ret_total,
|
||||
static int du_add_file(const char *filename, int dirfd,
|
||||
struct rb_root *shared_extents, uint64_t *ret_total,
|
||||
uint64_t *ret_shared, int top_level)
|
||||
{
|
||||
int ret, len = strlen(filename);
|
||||
char *pathtmp;
|
||||
struct stat st;
|
||||
struct du_dir_ctxt dir;
|
||||
struct du_dir_ctxt dir = INIT_DU_DIR_CTXT;
|
||||
int is_dir = 0;
|
||||
uint64_t file_total = 0;
|
||||
uint64_t file_shared = 0;
|
||||
u64 dir_set_shared = 0;
|
||||
u64 subvol;
|
||||
int fd;
|
||||
DIR *dirstream = NULL;
|
||||
@ -300,26 +466,57 @@ static int du_add_file(const char *filename, int dirfd, uint64_t *ret_total,
|
||||
goto out_close;
|
||||
|
||||
if (S_ISREG(st.st_mode)) {
|
||||
ret = du_calc_file_space(dirfd, filename, &file_total,
|
||||
ret = du_calc_file_space(fd, shared_extents, &file_total,
|
||||
&file_shared);
|
||||
if (ret)
|
||||
goto out_close;
|
||||
} else if (S_ISDIR(st.st_mode)) {
|
||||
memset(&dir, 0, sizeof(dir));
|
||||
struct rb_root *root = shared_extents;
|
||||
|
||||
ret = du_walk_dir(&dir);
|
||||
/*
|
||||
* We collect shared extents in an rb_root, the top
|
||||
* level caller will not pass a root down, so use the
|
||||
* one on our dir context.
|
||||
*/
|
||||
if (top_level)
|
||||
root = &dir.shared_extents;
|
||||
|
||||
is_dir = 1;
|
||||
|
||||
dir.dirstream = dirstream;
|
||||
ret = du_walk_dir(&dir, root);
|
||||
*pathp = '\0';
|
||||
if (ret)
|
||||
if (ret) {
|
||||
if (top_level)
|
||||
cleanup_shared_extents(root);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
file_total = dir.bytes_total;
|
||||
file_shared = dir.bytes_shared;
|
||||
if (top_level)
|
||||
count_shared_bytes(root, &dir_set_shared);
|
||||
}
|
||||
|
||||
if (!summarize || top_level) {
|
||||
printf("%s\t%s\t%s\n", pretty_size_mode(file_total, unit_mode),
|
||||
pretty_size_mode((file_total - file_shared), unit_mode),
|
||||
path);
|
||||
u64 excl = file_total - file_shared;
|
||||
|
||||
if (top_level) {
|
||||
u64 set_shared = file_shared;
|
||||
|
||||
if (is_dir)
|
||||
set_shared = dir_set_shared;
|
||||
|
||||
printf("%s\t%s\t%s\t%s\n",
|
||||
pretty_size_mode(file_total, unit_mode),
|
||||
pretty_size_mode(excl, unit_mode),
|
||||
pretty_size_mode(set_shared, unit_mode),
|
||||
path);
|
||||
} else {
|
||||
printf("%s\t%s\t\t\t%s\n",
|
||||
pretty_size_mode(file_total, unit_mode),
|
||||
pretty_size_mode(excl, unit_mode), path);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret_total)
|
||||
@ -369,15 +566,18 @@ int cmd_filesystem_du(int argc, char **argv)
|
||||
if (check_argc_min(argc - optind, 1))
|
||||
usage(cmd_filesystem_du_usage);
|
||||
|
||||
printf("total\texclusive\tfilename\n");
|
||||
printf("total\texclusive\tset shared\tfilename\n");
|
||||
|
||||
for (i = optind; i < argc; i++) {
|
||||
ret = du_add_file(argv[i], AT_FDCWD, NULL, NULL, 1);
|
||||
ret = du_add_file(argv[i], AT_FDCWD, NULL, NULL, NULL, 1);
|
||||
if (ret) {
|
||||
fprintf(stderr, "ERROR: can't check space of '%s': %s\n",
|
||||
argv[i], strerror(ret));
|
||||
error = 1;
|
||||
}
|
||||
|
||||
/* reset hard-link detection for each argument */
|
||||
clear_seen_inodes();
|
||||
}
|
||||
|
||||
return error;
|
||||
|
Loading…
Reference in New Issue
Block a user