btrfs-progs: fix stray fd close in open_ctree_fs_info()
[BUG]
Although commit b2a1be83b8
("btrfs-progs: mkfs: keep file descriptors
open during whole time") is making sure we're only closing the writeable
fds after the fs is properly created, there is still a missing fd not
following the requirement.
And this explains the issue why sometimes after mkfs.btrfs, lsblk still
doesn't give a valid uuid.
Shown by the strace output (the command is "mkfs.btrfs -f
/dev/test/scratch1"):
openat(AT_FDCWD, "/dev/test/scratch1", O_RDWR) = 5 <<< Writeable open
fadvise64(5, 0, 0, POSIX_FADV_DONTNEED) = 0
sysinfo({uptime=2529, loads=[8704, 6272, 2496], totalram=4104548352, freeram=3376611328, sharedram=9211904, bufferram=43016192, totalswap=3221221376, freeswap=3221221376, procs=190, totalhigh=0, freehigh=0, mem_unit=1}) = 0
lseek(5, 0, SEEK_END) = 10737418240
lseek(5, 0, SEEK_SET) = 0
......
close(5) = 0 <<< Closed now
pwrite64(6, "O\250\22\261\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1163264) = 16384
pwrite64(6, "\201\316\272\342\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1179648) = 16384
pwrite64(6, "K}S\t\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1196032) = 16384
pwrite64(6, "\207j$\265\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 1212416) = 16384
pwrite64(6, "q\267;\336\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 5242880) = 16384
fsync(6) <<< But we're still writing into the disk.
[CAUSE]
After more digging, it turns out we have a very obvious escape in
open_ctree_fs_info():
open_ctree_fs_info()
|- fp = open(oca->filename, flags);
|- info = __open_ctree_fd();
|- close(fp);
As later we only do IO using the device fd, this close() seems fine.
But the truth is, for mkfs usage, this fs_info is a temporary one, with
a special magic number for the disk. And since mkfs is doing writeable
operations, this close() would immediately trigger udev scan.
And since at this stage, the fs is not yet fully created, udev can race
with mkfs, and may get the invalid temporary superblock.
[FIX]
Introduce a new btrfs_fs_info member, initial_fd, for
open_ctree_fs_info() to record the fd.
And on close_ctree(), if we find fs_info::initial_fd is a valid fd, then
close it.
By this, we make sure all writeable fds are only closed after we have
written valid super blocks into the disk.
Issue: #734
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
ace7d241cc
commit
e54514aaea
|
@ -404,6 +404,15 @@ struct btrfs_fs_info {
|
|||
u32 sectorsize;
|
||||
u32 stripesize;
|
||||
u32 leaf_data_size;
|
||||
|
||||
/*
|
||||
* For open_ctree_fs_info() to hold the initial fd until close.
|
||||
*
|
||||
* For writeable open_ctree_fs_info() call, we should not close
|
||||
* the fd until the fs_info is properly closed, or it will trigger
|
||||
* udev scan while our fs is not properly initialized.
|
||||
*/
|
||||
int initial_fd;
|
||||
u16 csum_type;
|
||||
u16 csum_size;
|
||||
|
||||
|
|
|
@ -913,6 +913,7 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr)
|
|||
fs_info->metadata_alloc_profile = (u64)-1;
|
||||
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
|
||||
fs_info->nr_global_roots = 1;
|
||||
fs_info->initial_fd = -1;
|
||||
|
||||
return fs_info;
|
||||
|
||||
|
@ -1690,7 +1691,10 @@ struct btrfs_fs_info *open_ctree_fs_info(struct open_ctree_args *oca)
|
|||
return NULL;
|
||||
}
|
||||
info = __open_ctree_fd(fp, oca);
|
||||
close(fp);
|
||||
if (info)
|
||||
info->initial_fd = fp;
|
||||
else
|
||||
close(fp);
|
||||
return info;
|
||||
}
|
||||
|
||||
|
@ -2297,6 +2301,8 @@ skip_commit:
|
|||
|
||||
btrfs_release_all_roots(fs_info);
|
||||
ret = btrfs_close_devices(fs_info->fs_devices);
|
||||
if (fs_info->initial_fd >= 0)
|
||||
close(fs_info->initial_fd);
|
||||
btrfs_cleanup_all_caches(fs_info);
|
||||
btrfs_free_fs_info(fs_info);
|
||||
if (!err)
|
||||
|
|
Loading…
Reference in New Issue