Merge pull request #7349 from yuyuyu101/add-pwritev

osd: FileStore: use pwritev instead of lseek+writev

Reviewed-by: Kefu Chai <kchai@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-02-09 08:58:28 -05:00
commit a20134d94a
7 changed files with 93 additions and 18 deletions

View File

@ -45,6 +45,7 @@ CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
CHECK_FUNCTION_EXISTS(syncfs HAVE_SYS_SYNCFS)
CHECK_FUNCTION_EXISTS(sync_file_range HAVE_SYNC_FILE_RANGE)
CHECK_FUNCTION_EXISTS(mallinfo HAVE_MALLINFO)
CHECK_FUNCTION_EXISTS(pwritev HAVE_PWRITEV)
CHECK_INCLUDE_FILES("arpa/inet.h" HAVE_ARPA_INET_H)
CHECK_INCLUDE_FILES("boost/random/discrete_distribution.hpp" HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION)
CHECK_INCLUDE_FILES("dirent.h" HAVE_DIRENT_H)

View File

@ -1073,6 +1073,7 @@ AC_CHECK_HEADERS([sys/prctl.h])
AC_CHECK_FUNCS([prctl])
AC_CHECK_FUNCS([pipe2])
AC_CHECK_FUNCS([posix_fadvise])
AC_CHECK_FUNCS([pwritev], AC_DEFINE([HAVE_PWRITEV], [1], [we have pwritev]))
AC_MSG_CHECKING([for fdatasync])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[

View File

@ -1974,6 +1974,46 @@ int buffer::list::write_file(const char *fn, int mode)
return 0;
}
static int do_writev(int fd, struct iovec *vec, uint64_t offset, unsigned veclen, unsigned bytes)
{
ssize_t r = 0;
while (bytes > 0) {
#ifdef HAVE_PWRITEV
r = ::pwritev(fd, vec, veclen, offset);
#else
r = ::lseek64(fd, offset, SEEK_SET);
if (r != offset) {
r = -errno;
return r;
}
r = ::writev(fd, vec, veclen);
#endif
if (r < 0) {
if (errno == EINTR)
continue;
return -errno;
}
bytes -= r;
offset += r;
if (bytes == 0) break;
while (r > 0) {
if (vec[0].iov_len <= (size_t)r) {
// drain this whole item
r -= vec[0].iov_len;
++vec;
--veclen;
} else {
vec[0].iov_base = (char *)vec[0].iov_base + r;
vec[0].iov_len -= r;
break;
}
}
}
return 0;
}
int buffer::list::write_fd(int fd) const
{
if (can_zero_copy())
@ -2029,6 +2069,34 @@ int buffer::list::write_fd(int fd) const
return 0;
}
int buffer::list::write_fd(int fd, uint64_t offset) const
{
iovec iov[IOV_MAX];
std::list<ptr>::const_iterator p = _buffers.begin();
uint64_t left_pbrs = _buffers.size();
while (left_pbrs) {
ssize_t bytes = 0;
unsigned iovlen = 0;
uint64_t size = MIN(left_pbrs, IOV_MAX);
left_pbrs -= size;
while (size > 0) {
iov[iovlen].iov_base = (void *)p->c_str();
iov[iovlen].iov_len = p->length();
iovlen++;
bytes += p->length();
++p;
size--;
}
int r = do_writev(fd, iov, offset, iovlen, bytes);
if (r < 0)
return r;
offset += bytes;
}
return 0;
}
void buffer::list::prepare_iov(std::vector<iovec> *piov) const
{
piov->resize(_buffers.size());

View File

@ -554,6 +554,7 @@ namespace buffer CEPH_BUFFER_API {
int read_fd_zero_copy(int fd, size_t len);
int write_file(const char *fn, int mode=0644);
int write_fd(int fd) const;
int write_fd(int fd, uint64_t offset) const;
int write_fd_zero_copy(int fd) const;
void prepare_iov(std::vector<iovec> *piov) const;
uint32_t crc32c(uint32_t crc) const;

View File

@ -123,6 +123,9 @@
/* Define to 1 if you have the `syncfs' function. */
#cmakedefine HAVE_SYNCFS 1
/* Define to 1 if you have the `pwritev' function. */
#cmakedefine HAVE_PWRITEV 1
/* sync_file_range(2) is supported */
#cmakedefine HAVE_SYNC_FILE_RANGE

View File

@ -3194,8 +3194,6 @@ int FileStore::_write(const coll_t& cid, const ghobject_t& oid,
dout(15) << "write " << cid << "/" << oid << " " << offset << "~" << len << dendl;
int r;
int64_t actual;
FDRef fd;
r = lfn_open(cid, oid, true, &fd);
if (r < 0) {
@ -3205,23 +3203,8 @@ int FileStore::_write(const coll_t& cid, const ghobject_t& oid,
goto out;
}
// seek
actual = ::lseek64(**fd, offset, SEEK_SET);
if (actual < 0) {
r = -errno;
dout(0) << "write lseek64 to " << offset << " failed: " << cpp_strerror(r) << dendl;
lfn_close(fd);
goto out;
}
if (actual != (int64_t)offset) {
dout(0) << "write lseek64 to " << offset << " gave bad offset " << actual << dendl;
r = -EIO;
lfn_close(fd);
goto out;
}
// write
r = bl.write_fd(**fd);
r = bl.write_fd(**fd, offset);
if (r == 0)
r = bl.length();

View File

@ -2256,6 +2256,24 @@ TEST(BufferList, write_fd) {
::unlink(FILENAME);
}
TEST(BufferList, write_fd_offset) {
::unlink(FILENAME);
int fd = ::open(FILENAME, O_WRONLY|O_CREAT|O_TRUNC, 0600);
bufferlist bl;
for (unsigned i = 0; i < IOV_MAX * 2; i++) {
bufferptr ptr("A", 1);
bl.push_back(ptr);
}
uint64_t offset = 200;
EXPECT_EQ(0, bl.write_fd(fd, offset));
::close(fd);
struct stat st;
memset(&st, 0, sizeof(st));
::stat(FILENAME, &st);
EXPECT_EQ(IOV_MAX * 2 + offset, st.st_size);
::unlink(FILENAME);
}
TEST(BufferList, crc32c) {
bufferlist bl;
__u32 crc = 0;