402 lines
13 KiB
Python
Executable File
402 lines
13 KiB
Python
Executable File
#!/usr/bin/env python2
|
|
#
|
|
# Simple python program to print out all the extents of a single file
|
|
# LGPLv2 license
|
|
# Copyright Facebook 2014
|
|
|
|
import sys,os,struct,fcntl,ctypes,stat,argparse
|
|
|
|
# helpers for max ints
|
|
maxu64 = (1L << 64) - 1
|
|
maxu32 = (1L << 32) - 1
|
|
|
|
# the inode (like form stat)
|
|
BTRFS_INODE_ITEM_KEY = 1
|
|
# backref to the directory
|
|
BTRFS_INODE_REF_KEY = 12
|
|
# backref to the directory v2
|
|
BTRFS_INODE_EXTREF_KEY = 13
|
|
# xattr items
|
|
BTRFS_XATTR_ITEM_KEY = 24
|
|
# orphans for list files
|
|
BTRFS_ORPHAN_ITEM_KEY = 48
|
|
# treelog items for dirs
|
|
BTRFS_DIR_LOG_ITEM_KEY = 60
|
|
BTRFS_DIR_LOG_INDEX_KEY = 72
|
|
# dir items and dir indexes both hold filenames
|
|
BTRFS_DIR_ITEM_KEY = 84
|
|
BTRFS_DIR_INDEX_KEY = 96
|
|
# these are the file extent pointers
|
|
BTRFS_EXTENT_DATA_KEY = 108
|
|
# csums
|
|
BTRFS_EXTENT_CSUM_KEY = 128
|
|
# root item for subvols and snapshots
|
|
BTRFS_ROOT_ITEM_KEY = 132
|
|
# root item backrefs
|
|
BTRFS_ROOT_BACKREF_KEY = 144
|
|
BTRFS_ROOT_REF_KEY = 156
|
|
# each allocated extent has an extent item
|
|
BTRFS_EXTENT_ITEM_KEY = 168
|
|
# optimized extents for metadata only
|
|
BTRFS_METADATA_ITEM_KEY = 169
|
|
# backrefs for extents
|
|
BTRFS_TREE_BLOCK_REF_KEY = 176
|
|
BTRFS_EXTENT_DATA_REF_KEY = 178
|
|
BTRFS_EXTENT_REF_V0_KEY = 180
|
|
BTRFS_SHARED_BLOCK_REF_KEY = 182
|
|
BTRFS_SHARED_DATA_REF_KEY = 184
|
|
# one of these for each block group
|
|
BTRFS_BLOCK_GROUP_ITEM_KEY = 192
|
|
# dev extents records which part of each device is allocated
|
|
BTRFS_DEV_EXTENT_KEY = 204
|
|
# dev items describe devs
|
|
BTRFS_DEV_ITEM_KEY = 216
|
|
# one for each chunk
|
|
BTRFS_CHUNK_ITEM_KEY = 228
|
|
# qgroup info
|
|
BTRFS_QGROUP_STATUS_KEY = 240
|
|
BTRFS_QGROUP_INFO_KEY = 242
|
|
BTRFS_QGROUP_LIMIT_KEY = 244
|
|
BTRFS_QGROUP_RELATION_KEY = 246
|
|
# records balance progress
|
|
BTRFS_BALANCE_ITEM_KEY = 248
|
|
# stats on device errors
|
|
BTRFS_DEV_STATS_KEY = 249
|
|
BTRFS_DEV_REPLACE_KEY = 250
|
|
BTRFS_STRING_ITEM_KEY = 253
|
|
|
|
# store information about which extents are in use, and reference counts
|
|
BTRFS_EXTENT_TREE_OBJECTID = 2
|
|
|
|
BTRFS_BLOCK_GROUP_DATA = (1 << 0)
|
|
|
|
# in the kernel sources, this is flattened
|
|
# btrfs_ioctl_search_args_v2. It includes both the btrfs_ioctl_search_key
|
|
# and the buffer. We're using a 64K buffer size.
|
|
#
|
|
args_buffer_size = 65536
|
|
class btrfs_ioctl_search_args(ctypes.Structure):
|
|
_pack_ = 1
|
|
_fields_ = [ ("tree_id", ctypes.c_ulonglong),
|
|
("min_objectid", ctypes.c_ulonglong),
|
|
("max_objectid", ctypes.c_ulonglong),
|
|
("min_offset", ctypes.c_ulonglong),
|
|
("max_offset", ctypes.c_ulonglong),
|
|
("min_transid", ctypes.c_ulonglong),
|
|
("max_transid", ctypes.c_ulonglong),
|
|
("min_type", ctypes.c_uint),
|
|
("max_type", ctypes.c_uint),
|
|
("nr_items", ctypes.c_uint),
|
|
("unused", ctypes.c_uint),
|
|
("unused1", ctypes.c_ulonglong),
|
|
("unused2", ctypes.c_ulonglong),
|
|
("unused3", ctypes.c_ulonglong),
|
|
("unused4", ctypes.c_ulonglong),
|
|
("buf_size", ctypes.c_ulonglong),
|
|
("buf", ctypes.c_ubyte * args_buffer_size),
|
|
]
|
|
|
|
# the search ioctl returns one header for each item
|
|
#
|
|
class btrfs_ioctl_search_header(ctypes.Structure):
|
|
_pack_ = 1
|
|
_fields_ = [ ("transid", ctypes.c_ulonglong),
|
|
("objectid", ctypes.c_ulonglong),
|
|
("offset", ctypes.c_ulonglong),
|
|
("type", ctypes.c_uint),
|
|
("len", ctypes.c_uint),
|
|
]
|
|
|
|
# the type field in btrfs_file_extent_item
|
|
BTRFS_FILE_EXTENT_INLINE = 0
|
|
BTRFS_FILE_EXTENT_REG = 1
|
|
BTRFS_FILE_EXTENT_PREALLOC = 2
|
|
|
|
class btrfs_file_extent_item(ctypes.LittleEndianStructure):
|
|
_pack_ = 1
|
|
_fields_ = [ ("generation", ctypes.c_ulonglong),
|
|
("ram_bytes", ctypes.c_ulonglong),
|
|
("compression", ctypes.c_ubyte),
|
|
("encryption", ctypes.c_ubyte),
|
|
("other_encoding", ctypes.c_ubyte * 2),
|
|
("type", ctypes.c_ubyte),
|
|
("disk_bytenr", ctypes.c_ulonglong),
|
|
("disk_num_bytes", ctypes.c_ulonglong),
|
|
("offset", ctypes.c_ulonglong),
|
|
("num_bytes", ctypes.c_ulonglong),
|
|
]
|
|
|
|
class btrfs_block_group_item(ctypes.LittleEndianStructure):
|
|
_pack_ = 1
|
|
_fields_ = [ ("used", ctypes.c_ulonglong),
|
|
("chunk_objectid", ctypes.c_ulonglong),
|
|
("flags", ctypes.c_ulonglong),
|
|
]
|
|
|
|
class btrfs_ioctl_search():
|
|
def __init__(self):
|
|
self.args = btrfs_ioctl_search_args()
|
|
self.args.tree_id = 0
|
|
self.args.min_objectid = 0
|
|
self.args.max_objectid = maxu64
|
|
self.args.min_offset = 0
|
|
self.args.max_offset = maxu64
|
|
self.args.min_transid = 0
|
|
self.args.max_transid = maxu64
|
|
self.args.min_type = 0
|
|
self.args.max_type = maxu32
|
|
self.args.nr_items = 0
|
|
self.args.buf_size = args_buffer_size
|
|
|
|
# magic encoded for x86_64 this is the v2 search ioctl
|
|
self.ioctl_num = 3228603409L
|
|
|
|
# the results of the search get stored into args.buf
|
|
def search(self, fd, nritems=65536):
|
|
self.args.nr_items = nritems
|
|
fcntl.ioctl(fd, self.ioctl_num, self.args, 1)
|
|
|
|
# this moves the search key forward by one. If the end result is
|
|
# still a valid search key (all mins less than all maxes), we return
|
|
# True. Otherwise False
|
|
#
|
|
def advance_search(search):
|
|
if search.args.min_offset < maxu64:
|
|
search.args.min_offset += 1
|
|
elif search.args.min_type < 255:
|
|
search.args.min_type += 1
|
|
elif search.args.min_objectid < maxu64:
|
|
search.args.min_objectid += 1
|
|
else:
|
|
return False
|
|
|
|
if search.args.min_offset > search.args.max_offset:
|
|
return False
|
|
if search.args.min_type > search.args.max_type:
|
|
return False
|
|
if search.args.min_objectid > search.args.max_objectid:
|
|
return False
|
|
|
|
return True
|
|
|
|
# given one search_header and one file_item, print the details. This
|
|
# also tosses the [disk_bytenr,disk_num_bytes] into extent_hash to record
|
|
# which extents were used by this file
|
|
#
|
|
def print_one_extent(header, fi, extent_hash):
|
|
# we're ignoring inline items for now
|
|
if fi.type == BTRFS_FILE_EXTENT_INLINE:
|
|
# header.len is the length of the item returned. We subtract
|
|
# the part of the file item header that is actually used (21 bytes)
|
|
# and we get the length of the inlined data.
|
|
# this may or may not be compressed
|
|
inline_len = header.len - 21
|
|
if fi.compression:
|
|
ram_bytes = fi.ram_bytes
|
|
else:
|
|
ram_bytes = inline_len
|
|
print "(%Lu %Lu): ram %Lu disk 0 disk_size %Lu -- inline" % \
|
|
(header.objectid, header.offset, ram_bytes, inline_len)
|
|
extent_hash[-1] = inline_len
|
|
return
|
|
|
|
if fi.disk_bytenr == 0:
|
|
tag = " -- hole"
|
|
else:
|
|
tag = ""
|
|
print "(%Lu %Lu): ram %Lu disk %Lu disk_size %Lu%s" % (header.objectid,
|
|
header.offset, fi.num_bytes, fi.disk_bytenr, fi.disk_num_bytes, tag)
|
|
|
|
if fi.disk_bytenr:
|
|
extent_hash[fi.disk_bytenr] = fi.disk_num_bytes
|
|
|
|
# open 'filename' and run the search ioctl against it, printing all the extents
|
|
# we find
|
|
def print_file_extents(filename):
|
|
extent_hash = {}
|
|
|
|
s = btrfs_ioctl_search()
|
|
s.args.min_type = BTRFS_EXTENT_DATA_KEY
|
|
s.args.max_type = BTRFS_EXTENT_DATA_KEY
|
|
|
|
try:
|
|
fd = os.open(filename, os.O_RDONLY)
|
|
st = os.fstat(fd)
|
|
except Exception, e:
|
|
sys.stderr.write("Failed to open %s (%s)\n" % (filename, e))
|
|
return -1
|
|
|
|
if not stat.S_ISREG(st.st_mode):
|
|
sys.stderr.write("%s not a regular file\n" % filename)
|
|
return 0
|
|
|
|
s.args.min_objectid = st.st_ino
|
|
s.args.max_objectid = st.st_ino
|
|
|
|
size = st.st_size
|
|
|
|
while True:
|
|
try:
|
|
s.search(fd)
|
|
except Exception, e:
|
|
sys.stderr.write("Search ioctl failed for %s (%s)\n" % (filename, e))
|
|
return -1
|
|
|
|
if s.args.nr_items == 0:
|
|
break
|
|
|
|
# p is the results buffer from the kernel
|
|
p = ctypes.addressof(s.args.buf)
|
|
header = btrfs_ioctl_search_header()
|
|
header_size = ctypes.sizeof(header)
|
|
h = ctypes.addressof(header)
|
|
p_left = args_buffer_size
|
|
|
|
for x in xrange(0, s.args.nr_items):
|
|
# for each item, copy the header from the buffer into
|
|
# our header struct.
|
|
ctypes.memmove(h, p, header_size)
|
|
p += header_size
|
|
p_left -= header_size
|
|
|
|
# this would be a kernel bug it shouldn't be sending malformed
|
|
# items
|
|
if p_left <= 0:
|
|
break
|
|
|
|
if header.type == BTRFS_EXTENT_DATA_KEY:
|
|
fi = btrfs_file_extent_item()
|
|
|
|
# this would also be a kernel bug
|
|
if p_left < ctypes.sizeof(fi):
|
|
break
|
|
|
|
# Copy the file item out of the results buffer
|
|
ctypes.memmove(ctypes.addressof(fi), p, ctypes.sizeof(fi))
|
|
print_one_extent(header, fi, extent_hash)
|
|
|
|
p += header.len
|
|
p_left -= header.len
|
|
if p_left <= 0:
|
|
break
|
|
|
|
s.args.min_offset = header.offset
|
|
|
|
if not advance_search(s):
|
|
break
|
|
|
|
total_on_disk = 0
|
|
total_extents = 0
|
|
for x in extent_hash.itervalues():
|
|
total_on_disk += x
|
|
total_extents += 1
|
|
|
|
# don't divide by zero
|
|
if total_on_disk == 0:
|
|
total_on_disk = 1
|
|
|
|
print "file: %s extents %Lu disk size %Lu logical size %Lu ratio %.2f" % \
|
|
(filename, total_extents, total_on_disk, st.st_size,
|
|
float(st.st_size) / float(total_on_disk))
|
|
return 0
|
|
|
|
def print_block_groups(mountpoint):
|
|
s = btrfs_ioctl_search()
|
|
|
|
s.args.min_type = BTRFS_BLOCK_GROUP_ITEM_KEY
|
|
s.args.max_type = BTRFS_BLOCK_GROUP_ITEM_KEY
|
|
s.args.tree_id = BTRFS_EXTENT_TREE_OBJECTID
|
|
|
|
min_used = maxu64
|
|
free_of_min_used = 0
|
|
bg_of_min_used = 0
|
|
total_free = 0
|
|
|
|
try:
|
|
fd = os.open(mountpoint, os.O_RDONLY)
|
|
st = os.fstat(fd)
|
|
except Exception, e:
|
|
sys.stderr.write("Failed to open %s (%s)\n" % (mountpoint, e))
|
|
return -1
|
|
|
|
while True:
|
|
try:
|
|
s.search(fd)
|
|
except Exception, e:
|
|
sys.stderr.write("Search ioctl failed for %s (%s)\n" % (mountpoint, e))
|
|
return -1
|
|
|
|
if s.args.nr_items == 0:
|
|
break
|
|
|
|
# p is the results buffer from kernel
|
|
p = ctypes.addressof(s.args.buf)
|
|
header = btrfs_ioctl_search_header()
|
|
header_size = ctypes.sizeof(header)
|
|
h = ctypes.addressof(header)
|
|
p_left = args_buffer_size
|
|
|
|
for x in xrange(0, s.args.nr_items):
|
|
# for each itme, copy the header from the buffer into
|
|
# our header struct
|
|
ctypes.memmove(h, p, header_size)
|
|
p += header_size
|
|
p_left -= header_size
|
|
|
|
# this would be a kernel bug it shouldn't be sending malformed
|
|
# items
|
|
if p_left <= 0:
|
|
break
|
|
|
|
if header.type == BTRFS_BLOCK_GROUP_ITEM_KEY:
|
|
bg = btrfs_block_group_item()
|
|
|
|
# this would be a kernel bug
|
|
if p_left < ctypes.sizeof(bg):
|
|
break
|
|
|
|
ctypes.memmove(ctypes.addressof(bg), p, ctypes.sizeof(bg))
|
|
if bg.flags & BTRFS_BLOCK_GROUP_DATA:
|
|
print "block group offset %Lu len %Lu used %Lu chunk_objectid %Lu flags %Lu usage %.2f" %\
|
|
(header.objectid, header.offset, bg.used, bg.chunk_objectid, bg.flags, float(bg.used) / float(header.offset))
|
|
|
|
total_free += (header.offset - bg.used)
|
|
if min_used >= bg.used:
|
|
min_used = bg.used
|
|
free_of_min_used = (header.offset - bg.used)
|
|
bg_of_min_used = header.objectid
|
|
|
|
p += header.len
|
|
p_left -= header.len
|
|
if p_left <= 0:
|
|
break
|
|
|
|
s.args.min_objectid = header.objectid
|
|
|
|
if s.args.min_objectid < maxu64:
|
|
s.args.min_objectid += 1
|
|
if s.args.min_objectid > s.args.max_objectid:
|
|
break
|
|
|
|
print "total_free %Lu min_used %Lu free_of_min_used %Lu block_group_of_min_used %Lu" %\
|
|
(total_free, min_used, free_of_min_used, bg_of_min_used)
|
|
if (total_free - free_of_min_used) >= min_used:
|
|
print "balance block group (%Lu) can reduce the number of data block group" % bg_of_min_used
|
|
|
|
return 0
|
|
|
|
# main
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('path', nargs='+')
|
|
parser.add_argument('-b', '--block-group', action='store_const', const=1, help='get block group information, use mountpoint as "path"')
|
|
parser.add_argument('-f', '--file', action='store_const', const=1, help='get file mapping, use filepath')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.block_group:
|
|
for i in args.path[0:]:
|
|
print_block_groups(i)
|
|
elif args.file:
|
|
for f in args.path[0:]:
|
|
print_file_extents(f)
|