mirror of
https://github.com/kdave/btrfs-progs
synced 2024-12-30 18:22:14 +00:00
2cdc8dddbf
[BUG] When running mkfs tests on a newly rebooted minimal system, it can cause mkfs/009 to fail. The reproduce steps requires /tmp to has minimal files in the first place. # mkdir /tmp/rootdir # xfs_io -f -c "pwrite 0 16k" /tmp/rootdir # mkfs.btrfs --rootdir /tmp/rootdir -f $dev # btrfs check $dev Opening filesystem to check... Checking filesystem on /dev/test/scratch1 UUID: 6821b3db-f056-4c18-b797-32679dcd4272 [1/7] checking root items [2/7] checking extents data backref 13631488 root 5 owner 170 offset 0 num_refs 0 not found in extent tree incorrect local backref count on 13631488 root 5 owner 170 offset 0 found 1 wanted 0 back 0x55ff6cd72260 backref 13631488 root 5 not referenced back 0x55ff6cd4c1f0 incorrect global backref count on 13631488 found 2 wanted 1 backpointer mismatch on [13631488 16384] ERROR: errors found in extent allocation tree or chunk allocation [CAUSE] The extent tree has the following weird item: item 0 key (13631488 EXTENT_ITEM 16384) itemoff 16250 itemsize 33 refs 1 gen 0 flags DATA tree block backref root FS_TREE This is an extent item for data, thus it should not have an inline tree backref. Then checking the fs tree: item 0 key (170 INODE_ITEM 0) itemoff 16123 itemsize 160 generation 7 transid 0 size 16384 nbytes 16384 block group 0 mode 100600 links 1 uid 1000 gid 1000 rdev 0 sequence 0 flags 0x0(none) atime 1664866393.0 (2022-10-04 14:53:13) ctime 1664863510.0 (2022-10-04 14:05:10) mtime 1664863455.0 (2022-10-04 14:04:15) otime 0.0 (1970-01-01 08:00:00) There is an inode item before the root dir inode. And that inode number 170 is causing the problem. In traverse_directory(), we use the inode number reported from stat() directly as btrfs inode number, and pass it to btrfs_record_file_extent(), which finally calls btrfs_inc_extent_ref(), with above 170 passed as @owner parameter. But inside btrfs_inc_extent_ref() we use that @owner value to determine if it's a data backref. Since we got a smaller than BTRFS_FIRST_FREE_OBJECTID, btrfs treats it as tree block, and cause the above problem. [FIX] As a quick fix, always add BTRFS_FIRST_FREE_OBJECTID to all inode number directly grabbed from stat(). And add an ASSERT() in __btrfs_record_file_extent() to catch unexpected objectid. This is not a perfect solution, as the resulted fs will has a huge gap in its inodes: item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160 item 4 key (426 INODE_ITEM 0) itemoff 15883 itemsize 160 For a proper fix, we should allocate new btrfs inode numbers in a sequential order, but that would be another series of patches. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
982 lines
24 KiB
C
982 lines
24 KiB
C
/*
|
|
* Copyright (C) 2017 SUSE. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include "kerncompat.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <sys/xattr.h>
|
|
#include <dirent.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <ftw.h>
|
|
#include <errno.h>
|
|
#include <limits.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "kernel-lib/sizes.h"
|
|
#include "kernel-shared/extent_io.h"
|
|
#include "kernel-shared/ctree.h"
|
|
#include "kernel-shared/volumes.h"
|
|
#include "kernel-shared/disk-io.h"
|
|
#include "kernel-shared/transaction.h"
|
|
#include "common/internal.h"
|
|
#include "common/messages.h"
|
|
#include "common/path-utils.h"
|
|
#include "mkfs/rootdir.h"
|
|
|
|
static u32 fs_block_size;
|
|
|
|
static u64 index_cnt = 2;
|
|
|
|
/*
|
|
* Size estimate will be done using the following data:
|
|
* 1) Number of inodes
|
|
* Since we will later shrink the fs, over-estimate is completely fine here
|
|
* as long as our estimate ensures we can populate the image without ENOSPC.
|
|
* So we only record how many inodes there are, and account the maximum
|
|
* space for each inode.
|
|
*
|
|
* 2) Data space for each (regular) inode
|
|
* To estimate data chunk size.
|
|
* Don't care if it can fit as an inline extent.
|
|
* Always round them up to sectorsize.
|
|
*/
|
|
static u64 ftw_meta_nr_inode;
|
|
static u64 ftw_data_size;
|
|
|
|
static int add_directory_items(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, u64 objectid,
|
|
ino_t parent_inum, const char *name,
|
|
struct stat *st, int *dir_index_cnt)
|
|
{
|
|
int ret;
|
|
int name_len;
|
|
struct btrfs_key location;
|
|
u8 filetype = 0;
|
|
|
|
name_len = strlen(name);
|
|
|
|
location.objectid = objectid;
|
|
location.offset = 0;
|
|
location.type = BTRFS_INODE_ITEM_KEY;
|
|
|
|
if (S_ISDIR(st->st_mode))
|
|
filetype = BTRFS_FT_DIR;
|
|
if (S_ISREG(st->st_mode))
|
|
filetype = BTRFS_FT_REG_FILE;
|
|
if (S_ISLNK(st->st_mode))
|
|
filetype = BTRFS_FT_SYMLINK;
|
|
if (S_ISSOCK(st->st_mode))
|
|
filetype = BTRFS_FT_SOCK;
|
|
if (S_ISCHR(st->st_mode))
|
|
filetype = BTRFS_FT_CHRDEV;
|
|
if (S_ISBLK(st->st_mode))
|
|
filetype = BTRFS_FT_BLKDEV;
|
|
if (S_ISFIFO(st->st_mode))
|
|
filetype = BTRFS_FT_FIFO;
|
|
|
|
ret = btrfs_insert_dir_item(trans, root, name, name_len,
|
|
parent_inum, &location,
|
|
filetype, index_cnt);
|
|
if (ret)
|
|
return ret;
|
|
ret = btrfs_insert_inode_ref(trans, root, name, name_len,
|
|
objectid, parent_inum, index_cnt);
|
|
*dir_index_cnt = index_cnt;
|
|
index_cnt++;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int fill_inode_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_inode_item *dst, struct stat *src)
|
|
{
|
|
u64 blocks = 0;
|
|
u64 sectorsize = root->fs_info->sectorsize;
|
|
|
|
/*
|
|
* btrfs_inode_item has some reserved fields
|
|
* and represents on-disk inode entry, so
|
|
* zero everything to prevent information leak
|
|
*/
|
|
memset(dst, 0, sizeof(*dst));
|
|
|
|
btrfs_set_stack_inode_generation(dst, trans->transid);
|
|
btrfs_set_stack_inode_size(dst, src->st_size);
|
|
btrfs_set_stack_inode_nbytes(dst, 0);
|
|
btrfs_set_stack_inode_block_group(dst, 0);
|
|
btrfs_set_stack_inode_nlink(dst, src->st_nlink);
|
|
btrfs_set_stack_inode_uid(dst, src->st_uid);
|
|
btrfs_set_stack_inode_gid(dst, src->st_gid);
|
|
btrfs_set_stack_inode_mode(dst, src->st_mode);
|
|
btrfs_set_stack_inode_rdev(dst, 0);
|
|
btrfs_set_stack_inode_flags(dst, 0);
|
|
btrfs_set_stack_timespec_sec(&dst->atime, src->st_atime);
|
|
btrfs_set_stack_timespec_nsec(&dst->atime, 0);
|
|
btrfs_set_stack_timespec_sec(&dst->ctime, src->st_ctime);
|
|
btrfs_set_stack_timespec_nsec(&dst->ctime, 0);
|
|
btrfs_set_stack_timespec_sec(&dst->mtime, src->st_mtime);
|
|
btrfs_set_stack_timespec_nsec(&dst->mtime, 0);
|
|
btrfs_set_stack_timespec_sec(&dst->otime, 0);
|
|
btrfs_set_stack_timespec_nsec(&dst->otime, 0);
|
|
|
|
if (S_ISDIR(src->st_mode)) {
|
|
btrfs_set_stack_inode_size(dst, 0);
|
|
btrfs_set_stack_inode_nlink(dst, 1);
|
|
}
|
|
if (S_ISREG(src->st_mode)) {
|
|
btrfs_set_stack_inode_size(dst, (u64)src->st_size);
|
|
if (src->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info) &&
|
|
src->st_size < sectorsize)
|
|
btrfs_set_stack_inode_nbytes(dst, src->st_size);
|
|
else {
|
|
blocks = src->st_size / sectorsize;
|
|
if (src->st_size % sectorsize)
|
|
blocks += 1;
|
|
blocks *= sectorsize;
|
|
btrfs_set_stack_inode_nbytes(dst, blocks);
|
|
}
|
|
}
|
|
if (S_ISLNK(src->st_mode))
|
|
btrfs_set_stack_inode_nbytes(dst, src->st_size + 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int directory_select(const struct dirent *entry)
|
|
{
|
|
if (entry->d_name[0] == '.' &&
|
|
(entry->d_name[1] == 0 ||
|
|
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static void free_namelist(struct dirent **files, int count)
|
|
{
|
|
int i;
|
|
|
|
if (count < 0)
|
|
return;
|
|
|
|
for (i = 0; i < count; ++i)
|
|
free(files[i]);
|
|
free(files);
|
|
}
|
|
|
|
static u64 calculate_dir_inode_size(const char *dirname)
|
|
{
|
|
int count, i;
|
|
struct dirent **files, *cur_file;
|
|
u64 dir_inode_size = 0;
|
|
|
|
count = scandir(dirname, &files, directory_select, NULL);
|
|
|
|
for (i = 0; i < count; i++) {
|
|
cur_file = files[i];
|
|
dir_inode_size += strlen(cur_file->d_name);
|
|
}
|
|
|
|
free_namelist(files, count);
|
|
|
|
dir_inode_size *= 2;
|
|
return dir_inode_size;
|
|
}
|
|
|
|
static int add_inode_items(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct stat *st, const char *name,
|
|
u64 self_objectid,
|
|
struct btrfs_inode_item *inode_ret)
|
|
{
|
|
int ret;
|
|
struct btrfs_inode_item btrfs_inode;
|
|
u64 objectid;
|
|
u64 inode_size = 0;
|
|
|
|
fill_inode_item(trans, root, &btrfs_inode, st);
|
|
objectid = self_objectid;
|
|
|
|
if (S_ISDIR(st->st_mode)) {
|
|
inode_size = calculate_dir_inode_size(name);
|
|
btrfs_set_stack_inode_size(&btrfs_inode, inode_size);
|
|
}
|
|
|
|
ret = btrfs_insert_inode(trans, root, objectid, &btrfs_inode);
|
|
|
|
*inode_ret = btrfs_inode;
|
|
return ret;
|
|
}
|
|
|
|
static int add_xattr_item(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, u64 objectid,
|
|
const char *file_name)
|
|
{
|
|
int ret;
|
|
int cur_name_len;
|
|
char xattr_list[XATTR_LIST_MAX];
|
|
char *xattr_list_end;
|
|
char *cur_name;
|
|
char cur_value[XATTR_SIZE_MAX];
|
|
|
|
ret = llistxattr(file_name, xattr_list, XATTR_LIST_MAX);
|
|
if (ret < 0) {
|
|
if (errno == ENOTSUP)
|
|
return 0;
|
|
error("getting a list of xattr failed for %s: %m", file_name);
|
|
return ret;
|
|
}
|
|
if (ret == 0)
|
|
return ret;
|
|
|
|
xattr_list_end = xattr_list + ret;
|
|
cur_name = xattr_list;
|
|
while (cur_name < xattr_list_end) {
|
|
cur_name_len = strlen(cur_name);
|
|
|
|
ret = lgetxattr(file_name, cur_name, cur_value, XATTR_SIZE_MAX);
|
|
if (ret < 0) {
|
|
if (errno == ENOTSUP)
|
|
return 0;
|
|
error("getting a xattr value failed for %s attr %s: %m",
|
|
file_name, cur_name);
|
|
return ret;
|
|
}
|
|
|
|
ret = btrfs_insert_xattr_item(trans, root, cur_name,
|
|
cur_name_len, cur_value,
|
|
ret, objectid);
|
|
if (ret) {
|
|
errno = -ret;
|
|
error("inserting a xattr item failed for %s: %m",
|
|
file_name);
|
|
}
|
|
|
|
cur_name += cur_name_len + 1;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int add_symbolic_link(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
u64 objectid, const char *path_name)
|
|
{
|
|
int ret;
|
|
char buf[PATH_MAX];
|
|
|
|
ret = readlink(path_name, buf, sizeof(buf));
|
|
if (ret <= 0) {
|
|
error("readlink failed for %s: %m", path_name);
|
|
goto fail;
|
|
}
|
|
if (ret >= sizeof(buf)) {
|
|
error("symlink too long for %s", path_name);
|
|
ret = -1;
|
|
goto fail;
|
|
}
|
|
|
|
buf[ret] = '\0'; /* readlink does not do it for us */
|
|
ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
|
|
buf, ret + 1);
|
|
fail:
|
|
return ret;
|
|
}
|
|
|
|
static int add_file_items(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
struct btrfs_inode_item *btrfs_inode, u64 objectid,
|
|
struct stat *st, const char *path_name)
|
|
{
|
|
int ret = -1;
|
|
ssize_t ret_read;
|
|
u64 bytes_read = 0;
|
|
struct btrfs_key key;
|
|
int blocks;
|
|
u32 sectorsize = root->fs_info->sectorsize;
|
|
u64 first_block = 0;
|
|
u64 file_pos = 0;
|
|
u64 cur_bytes;
|
|
u64 total_bytes;
|
|
struct extent_buffer *eb = NULL;
|
|
int fd;
|
|
|
|
if (st->st_size == 0)
|
|
return 0;
|
|
|
|
fd = open(path_name, O_RDONLY);
|
|
if (fd == -1) {
|
|
error("cannot open %s: %m", path_name);
|
|
return ret;
|
|
}
|
|
|
|
blocks = st->st_size / sectorsize;
|
|
if (st->st_size % sectorsize)
|
|
blocks += 1;
|
|
|
|
if (st->st_size <= BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info) &&
|
|
st->st_size < sectorsize) {
|
|
char *buffer = malloc(st->st_size);
|
|
|
|
if (!buffer) {
|
|
ret = -ENOMEM;
|
|
goto end;
|
|
}
|
|
|
|
ret_read = pread64(fd, buffer, st->st_size, bytes_read);
|
|
if (ret_read == -1) {
|
|
error("cannot read %s at offset %llu length %llu: %m",
|
|
path_name, bytes_read, (unsigned long long)st->st_size);
|
|
free(buffer);
|
|
goto end;
|
|
}
|
|
|
|
ret = btrfs_insert_inline_extent(trans, root, objectid, 0,
|
|
buffer, st->st_size);
|
|
free(buffer);
|
|
goto end;
|
|
}
|
|
|
|
/* round up our st_size to the FS blocksize */
|
|
total_bytes = (u64)blocks * sectorsize;
|
|
|
|
/*
|
|
* do our IO in extent buffers so it can work
|
|
* against any raid type
|
|
*/
|
|
eb = calloc(1, sizeof(*eb) + sectorsize);
|
|
if (!eb) {
|
|
ret = -ENOMEM;
|
|
goto end;
|
|
}
|
|
|
|
again:
|
|
|
|
/*
|
|
* keep our extent size at 1MB max, this makes it easier to work inside
|
|
* the tiny block groups created during mkfs
|
|
*/
|
|
cur_bytes = min(total_bytes, (u64)SZ_1M);
|
|
ret = btrfs_reserve_extent(trans, root, cur_bytes, 0, 0, (u64)-1,
|
|
&key, 1);
|
|
if (ret)
|
|
goto end;
|
|
|
|
first_block = key.objectid;
|
|
bytes_read = 0;
|
|
|
|
while (bytes_read < cur_bytes) {
|
|
|
|
memset(eb->data, 0, sectorsize);
|
|
|
|
ret_read = pread64(fd, eb->data, sectorsize, file_pos +
|
|
bytes_read);
|
|
if (ret_read == -1) {
|
|
error("cannot read %s at offset %llu length %u: %m",
|
|
path_name, file_pos + bytes_read, sectorsize);
|
|
goto end;
|
|
}
|
|
|
|
eb->start = first_block + bytes_read;
|
|
eb->len = sectorsize;
|
|
eb->fs_info = root->fs_info;
|
|
|
|
/*
|
|
* we're doing the csum before we record the extent, but
|
|
* that's ok
|
|
*/
|
|
ret = btrfs_csum_file_block(trans,
|
|
first_block + bytes_read + sectorsize,
|
|
first_block + bytes_read,
|
|
eb->data, sectorsize);
|
|
if (ret)
|
|
goto end;
|
|
|
|
ret = write_and_map_eb(root->fs_info, eb);
|
|
if (ret) {
|
|
error("failed to write %s", path_name);
|
|
goto end;
|
|
}
|
|
|
|
bytes_read += sectorsize;
|
|
}
|
|
|
|
if (bytes_read) {
|
|
ret = btrfs_record_file_extent(trans, root, objectid,
|
|
btrfs_inode, file_pos, first_block, cur_bytes);
|
|
if (ret)
|
|
goto end;
|
|
|
|
}
|
|
|
|
file_pos += cur_bytes;
|
|
total_bytes -= cur_bytes;
|
|
|
|
if (total_bytes)
|
|
goto again;
|
|
|
|
end:
|
|
free(eb);
|
|
close(fd);
|
|
return ret;
|
|
}
|
|
|
|
static int traverse_directory(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, const char *dir_name,
|
|
struct directory_name_entry *dir_head)
|
|
{
|
|
int ret = 0;
|
|
|
|
struct btrfs_inode_item cur_inode;
|
|
struct btrfs_inode_item *inode_item;
|
|
int count, i, dir_index_cnt;
|
|
struct dirent **files;
|
|
struct stat st;
|
|
struct directory_name_entry *dir_entry, *parent_dir_entry;
|
|
struct dirent *cur_file;
|
|
ino_t parent_inum, cur_inum;
|
|
ino_t highest_inum = 0;
|
|
const char *parent_dir_name;
|
|
struct btrfs_path path;
|
|
struct extent_buffer *leaf;
|
|
struct btrfs_key root_dir_key;
|
|
u64 root_dir_inode_size = 0;
|
|
|
|
/* Add list for source directory */
|
|
dir_entry = malloc(sizeof(struct directory_name_entry));
|
|
if (!dir_entry)
|
|
return -ENOMEM;
|
|
dir_entry->dir_name = dir_name;
|
|
dir_entry->path = realpath(dir_name, NULL);
|
|
if (!dir_entry->path) {
|
|
error("realpath failed for %s: %m", dir_name);
|
|
ret = -1;
|
|
goto fail_no_dir;
|
|
}
|
|
|
|
parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID;
|
|
dir_entry->inum = parent_inum;
|
|
list_add_tail(&dir_entry->list, &dir_head->list);
|
|
|
|
btrfs_init_path(&path);
|
|
|
|
root_dir_key.objectid = btrfs_root_dirid(&root->root_item);
|
|
root_dir_key.offset = 0;
|
|
root_dir_key.type = BTRFS_INODE_ITEM_KEY;
|
|
ret = btrfs_lookup_inode(trans, root, &path, &root_dir_key, 1);
|
|
if (ret) {
|
|
error("failed to lookup root dir: %d", ret);
|
|
goto fail_no_dir;
|
|
}
|
|
|
|
leaf = path.nodes[0];
|
|
inode_item = btrfs_item_ptr(leaf, path.slots[0],
|
|
struct btrfs_inode_item);
|
|
|
|
root_dir_inode_size = calculate_dir_inode_size(dir_name);
|
|
btrfs_set_inode_size(leaf, inode_item, root_dir_inode_size);
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
do {
|
|
parent_dir_entry = list_entry(dir_head->list.next,
|
|
struct directory_name_entry,
|
|
list);
|
|
list_del(&parent_dir_entry->list);
|
|
|
|
parent_inum = parent_dir_entry->inum;
|
|
parent_dir_name = parent_dir_entry->dir_name;
|
|
if (chdir(parent_dir_entry->path)) {
|
|
error("chdir failed for %s: %m",
|
|
parent_dir_name);
|
|
ret = -1;
|
|
goto fail_no_files;
|
|
}
|
|
|
|
count = scandir(parent_dir_entry->path, &files,
|
|
directory_select, NULL);
|
|
if (count == -1) {
|
|
error("scandir failed for %s: %m",
|
|
parent_dir_name);
|
|
ret = -1;
|
|
goto fail;
|
|
}
|
|
|
|
for (i = 0; i < count; i++) {
|
|
cur_file = files[i];
|
|
|
|
if (lstat(cur_file->d_name, &st) == -1) {
|
|
error("lstat failed for %s: %m",
|
|
cur_file->d_name);
|
|
ret = -1;
|
|
goto fail;
|
|
}
|
|
|
|
/*
|
|
* We can not directly use the source ino number,
|
|
* as there is a chance that the ino is smaller than
|
|
* BTRFS_FIRST_FREE_OBJECTID, which will screw up
|
|
* backref code.
|
|
*/
|
|
cur_inum = st.st_ino + BTRFS_FIRST_FREE_OBJECTID;
|
|
ret = add_directory_items(trans, root,
|
|
cur_inum, parent_inum,
|
|
cur_file->d_name,
|
|
&st, &dir_index_cnt);
|
|
if (ret) {
|
|
error("unable to add directory items for %s: %d",
|
|
cur_file->d_name, ret);
|
|
goto fail;
|
|
}
|
|
|
|
ret = add_inode_items(trans, root, &st,
|
|
cur_file->d_name, cur_inum,
|
|
&cur_inode);
|
|
if (ret == -EEXIST) {
|
|
if (st.st_nlink <= 1) {
|
|
error(
|
|
"item %s already exists but has wrong st_nlink %lu <= 1",
|
|
cur_file->d_name,
|
|
(unsigned long)st.st_nlink);
|
|
goto fail;
|
|
}
|
|
ret = 0;
|
|
continue;
|
|
}
|
|
if (ret) {
|
|
error("unable to add inode items for %s: %d",
|
|
cur_file->d_name, ret);
|
|
goto fail;
|
|
}
|
|
|
|
ret = add_xattr_item(trans, root,
|
|
cur_inum, cur_file->d_name);
|
|
if (ret) {
|
|
error("unable to add xattr items for %s: %d",
|
|
cur_file->d_name, ret);
|
|
if (ret != -ENOTSUP)
|
|
goto fail;
|
|
}
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
char tmp[PATH_MAX];
|
|
|
|
dir_entry = malloc(sizeof(*dir_entry));
|
|
if (!dir_entry) {
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
dir_entry->dir_name = cur_file->d_name;
|
|
if (path_cat_out(tmp, parent_dir_entry->path,
|
|
cur_file->d_name)) {
|
|
error("invalid path: %s/%s",
|
|
parent_dir_entry->path,
|
|
cur_file->d_name);
|
|
ret = -EINVAL;
|
|
goto fail;
|
|
}
|
|
dir_entry->path = strdup(tmp);
|
|
if (!dir_entry->path) {
|
|
error_msg(ERROR_MSG_MEMORY, NULL);
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
dir_entry->inum = cur_inum;
|
|
list_add_tail(&dir_entry->list,
|
|
&dir_head->list);
|
|
} else if (S_ISREG(st.st_mode)) {
|
|
ret = add_file_items(trans, root, &cur_inode,
|
|
cur_inum, &st,
|
|
cur_file->d_name);
|
|
if (ret) {
|
|
error("unable to add file items for %s: %d",
|
|
cur_file->d_name, ret);
|
|
goto fail;
|
|
}
|
|
} else if (S_ISLNK(st.st_mode)) {
|
|
ret = add_symbolic_link(trans, root,
|
|
cur_inum, cur_file->d_name);
|
|
if (ret) {
|
|
error("unable to add symlink for %s: %d",
|
|
cur_file->d_name, ret);
|
|
goto fail;
|
|
}
|
|
}
|
|
}
|
|
|
|
free_namelist(files, count);
|
|
free(parent_dir_entry->path);
|
|
free(parent_dir_entry);
|
|
|
|
index_cnt = 2;
|
|
|
|
} while (!list_empty(&dir_head->list));
|
|
|
|
out:
|
|
return !!ret;
|
|
fail:
|
|
free_namelist(files, count);
|
|
fail_no_files:
|
|
free(parent_dir_entry);
|
|
goto out;
|
|
fail_no_dir:
|
|
free(dir_entry);
|
|
goto out;
|
|
}
|
|
|
|
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
|
|
bool verbose)
|
|
{
|
|
int ret;
|
|
struct btrfs_trans_handle *trans;
|
|
struct stat root_st;
|
|
struct directory_name_entry dir_head;
|
|
struct directory_name_entry *dir_entry = NULL;
|
|
|
|
ret = lstat(source_dir, &root_st);
|
|
if (ret) {
|
|
error("unable to lstat %s: %m", source_dir);
|
|
ret = -errno;
|
|
goto out;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&dir_head.list);
|
|
|
|
trans = btrfs_start_transaction(root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
|
goto fail;
|
|
}
|
|
|
|
ret = traverse_directory(trans, root, source_dir, &dir_head);
|
|
if (ret) {
|
|
error("unable to traverse directory %s: %d", source_dir, ret);
|
|
goto fail;
|
|
}
|
|
ret = btrfs_commit_transaction(trans, root);
|
|
if (ret) {
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
|
|
goto out;
|
|
}
|
|
|
|
if (verbose)
|
|
printf("Making image is completed.\n");
|
|
return 0;
|
|
fail:
|
|
/*
|
|
* Since we don't have btrfs_abort_transaction() yet, uncommitted trans
|
|
* will trigger a BUG_ON().
|
|
*
|
|
* However before mkfs is fully finished, the magic number is invalid,
|
|
* so even we commit transaction here, the fs still can't be mounted.
|
|
*
|
|
* To do a graceful error out, here we commit transaction as a
|
|
* workaround.
|
|
* Since we have already hit some problem, the return value doesn't
|
|
* matter now.
|
|
*/
|
|
btrfs_commit_transaction(trans, root);
|
|
while (!list_empty(&dir_head.list)) {
|
|
dir_entry = list_entry(dir_head.list.next,
|
|
struct directory_name_entry, list);
|
|
list_del(&dir_entry->list);
|
|
free(dir_entry->path);
|
|
free(dir_entry);
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int ftw_add_entry_size(const char *fpath, const struct stat *st,
|
|
int type, struct FTW *ftwbuf)
|
|
{
|
|
/*
|
|
* Failed to read the directory, mostly due to EPERM. Abort ASAP, so
|
|
* we don't need to populate the fs.
|
|
*/
|
|
if (type == FTW_DNR || type == FTW_NS)
|
|
return -EPERM;
|
|
|
|
if (S_ISREG(st->st_mode))
|
|
ftw_data_size += round_up(st->st_size, fs_block_size);
|
|
ftw_meta_nr_inode++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
|
|
u64 meta_profile, u64 data_profile)
|
|
{
|
|
u64 total_size = 0;
|
|
int ret;
|
|
|
|
u64 meta_size = 0; /* Based on @ftw_meta_nr_inode */
|
|
u64 meta_chunk_size = 0; /* Based on @meta_size */
|
|
u64 data_chunk_size = 0; /* Based on @ftw_data_size */
|
|
|
|
u64 meta_threshold = SZ_8M;
|
|
u64 data_threshold = SZ_8M;
|
|
|
|
float data_multiplier = 1;
|
|
float meta_multiplier = 1;
|
|
|
|
fs_block_size = sectorsize;
|
|
ftw_data_size = 0;
|
|
ftw_meta_nr_inode = 0;
|
|
|
|
/*
|
|
* Symbolic link is not followed when creating files, so no need to
|
|
* follow them here.
|
|
*/
|
|
ret = nftw(dir_name, ftw_add_entry_size, 10, FTW_PHYS);
|
|
if (ret < 0) {
|
|
error("ftw subdir walk of %s failed: %m", dir_name);
|
|
exit(1);
|
|
}
|
|
|
|
|
|
/*
|
|
* Maximum metadata usage for every inode, which will be PATH_MAX
|
|
* for the following items:
|
|
* 1) DIR_ITEM
|
|
* 2) DIR_INDEX
|
|
* 3) INODE_REF
|
|
*
|
|
* Plus possible inline extent size, which is sectorsize.
|
|
*
|
|
* And finally, allow metadata usage to increase with data size.
|
|
* Follow the old kernel 8:1 data:meta ratio.
|
|
* This is especially important for --rootdir, as the file extent size
|
|
* upper limit is 1M, instead of 128M in kernel.
|
|
* This can bump meta usage easily.
|
|
*/
|
|
meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
|
|
ftw_data_size / 8;
|
|
|
|
/* Minimal chunk size from btrfs_alloc_chunk(). */
|
|
if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
|
|
meta_threshold = SZ_32M;
|
|
meta_multiplier = 2;
|
|
}
|
|
if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
|
|
data_threshold = SZ_64M;
|
|
data_multiplier = 2;
|
|
}
|
|
|
|
/*
|
|
* Only when the usage is larger than the minimal chunk size (threshold)
|
|
* we need to allocate new chunk, or the initial chunk in the image is
|
|
* large enough.
|
|
*/
|
|
if (meta_size > meta_threshold)
|
|
meta_chunk_size = (round_up(meta_size, meta_threshold) -
|
|
meta_threshold) * meta_multiplier;
|
|
if (ftw_data_size > data_threshold)
|
|
data_chunk_size = (round_up(ftw_data_size, data_threshold) -
|
|
data_threshold) * data_multiplier;
|
|
|
|
total_size = data_chunk_size + meta_chunk_size + min_dev_size;
|
|
return total_size;
|
|
}
|
|
|
|
/*
|
|
* Get the end position of the last device extent for given @devid;
|
|
* @size_ret is exclusive (means it should be aligned to sectorsize)
|
|
*/
|
|
static int get_device_extent_end(struct btrfs_fs_info *fs_info,
|
|
u64 devid, u64 *size_ret)
|
|
{
|
|
struct btrfs_root *dev_root = fs_info->dev_root;
|
|
struct btrfs_key key;
|
|
struct btrfs_path path;
|
|
struct btrfs_dev_extent *de;
|
|
int ret;
|
|
|
|
key.objectid = devid;
|
|
key.type = BTRFS_DEV_EXTENT_KEY;
|
|
key.offset = (u64)-1;
|
|
|
|
btrfs_init_path(&path);
|
|
ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
|
|
if (ret == 0) {
|
|
error("DEV_EXTENT for devid %llu not found", devid);
|
|
ret = -EUCLEAN;
|
|
goto out;
|
|
}
|
|
|
|
ret = btrfs_previous_item(dev_root, &path, devid, BTRFS_DEV_EXTENT_KEY);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
/* No dev_extent at all, not really possible for rootdir case */
|
|
if (ret > 0) {
|
|
*size_ret = 0;
|
|
ret = -EUCLEAN;
|
|
goto out;
|
|
}
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
|
|
de = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
|
struct btrfs_dev_extent);
|
|
*size_ret = key.offset + btrfs_dev_extent_length(path.nodes[0], de);
|
|
out:
|
|
btrfs_release_path(&path);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Set device size to @new_size.
|
|
*
|
|
* Only used for --rootdir option.
|
|
* We will need to reset the following values:
|
|
* 1) dev item in chunk tree
|
|
* 2) super->dev_item
|
|
* 3) super->total_bytes
|
|
*/
|
|
static int set_device_size(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_device *device, u64 new_size)
|
|
{
|
|
struct btrfs_root *chunk_root = fs_info->chunk_root;
|
|
struct btrfs_trans_handle *trans;
|
|
struct btrfs_dev_item *di;
|
|
struct btrfs_path path;
|
|
struct btrfs_key key;
|
|
int ret;
|
|
|
|
/*
|
|
* Update in-memory device->total_bytes, so that at trans commit time,
|
|
* super->dev_item will also get updated
|
|
*/
|
|
device->total_bytes = new_size;
|
|
btrfs_init_path(&path);
|
|
|
|
/* Update device item in chunk tree */
|
|
trans = btrfs_start_transaction(chunk_root, 1);
|
|
if (IS_ERR(trans)) {
|
|
ret = PTR_ERR(trans);
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
|
return ret;
|
|
}
|
|
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
|
|
key.type = BTRFS_DEV_ITEM_KEY;
|
|
key.offset = device->devid;
|
|
|
|
ret = btrfs_search_slot(trans, chunk_root, &key, &path, 0, 1);
|
|
if (ret < 0)
|
|
goto err;
|
|
if (ret > 0)
|
|
ret = -ENOENT;
|
|
di = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
|
struct btrfs_dev_item);
|
|
btrfs_set_device_total_bytes(path.nodes[0], di, new_size);
|
|
btrfs_mark_buffer_dirty(path.nodes[0]);
|
|
|
|
/*
|
|
* Update super->total_bytes, since it's only used for --rootdir,
|
|
* there is only one device, just use the @new_size.
|
|
*/
|
|
btrfs_set_super_total_bytes(fs_info->super_copy, new_size);
|
|
|
|
/*
|
|
* Commit transaction to reflect the updated super->total_bytes and
|
|
* super->dev_item
|
|
*/
|
|
ret = btrfs_commit_transaction(trans, chunk_root);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
|
|
}
|
|
btrfs_release_path(&path);
|
|
return ret;
|
|
|
|
err:
|
|
btrfs_release_path(&path);
|
|
/*
|
|
* Committing the transaction here won't cause problems since the fs
|
|
* still has an invalid magic number, and something wrong already
|
|
* happened, we don't care the return value anyway.
|
|
*/
|
|
btrfs_commit_transaction(trans, chunk_root);
|
|
return ret;
|
|
}
|
|
|
|
int btrfs_mkfs_shrink_fs(struct btrfs_fs_info *fs_info, u64 *new_size_ret,
|
|
bool shrink_file_size)
|
|
{
|
|
u64 new_size;
|
|
struct btrfs_device *device;
|
|
struct list_head *cur;
|
|
struct stat64 file_stat;
|
|
int nr_devs = 0;
|
|
int ret;
|
|
|
|
list_for_each(cur, &fs_info->fs_devices->devices)
|
|
nr_devs++;
|
|
|
|
if (nr_devs > 1) {
|
|
error("cannot shrink fs with more than 1 device");
|
|
return -ENOTTY;
|
|
}
|
|
|
|
ret = get_device_extent_end(fs_info, 1, &new_size);
|
|
if (ret < 0) {
|
|
errno = -ret;
|
|
error("failed to get minimal device size: %d (%m)", ret);
|
|
return ret;
|
|
}
|
|
|
|
if (!IS_ALIGNED(new_size, fs_info->sectorsize)) {
|
|
error("shrunk filesystem size %llu not aligned to %u",
|
|
new_size, fs_info->sectorsize);
|
|
return -EUCLEAN;
|
|
}
|
|
|
|
device = list_entry(fs_info->fs_devices->devices.next,
|
|
struct btrfs_device, dev_list);
|
|
ret = set_device_size(fs_info, device, new_size);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (new_size_ret)
|
|
*new_size_ret = new_size;
|
|
|
|
if (shrink_file_size) {
|
|
ret = fstat64(device->fd, &file_stat);
|
|
if (ret < 0) {
|
|
error("failed to stat devid %llu: %m", device->devid);
|
|
return ret;
|
|
}
|
|
if (!S_ISREG(file_stat.st_mode))
|
|
return ret;
|
|
ret = ftruncate64(device->fd, new_size);
|
|
if (ret < 0) {
|
|
error("failed to truncate device file of devid %llu: %m",
|
|
device->devid);
|
|
return ret;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|