From 1a69bec52f0215cea24f49945c42db00fa317395 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 17 Apr 2017 08:52:12 -0400 Subject: [PATCH] client: refine fsync/close writeback error handling Previously, errors stuck indelibly to the inode, which meant that a close call would see an error even if the user already dutifully fsync()'d and handled it. We should emit each error only once per file handle. Signed-off-by: John Spray --- qa/tasks/cephfs/test_full.py | 32 +++++++++++++----------- src/client/CMakeLists.txt | 1 + src/client/Client.cc | 48 ++++++++++++++++++++++-------------- src/client/Fh.cc | 32 ++++++++++++++++++++++++ src/client/Fh.h | 19 ++++++++++++-- src/client/Inode.cc | 9 +++++++ src/client/Inode.h | 12 +++++---- 7 files changed, 114 insertions(+), 39 deletions(-) create mode 100644 src/client/Fh.cc diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py index a07ceccf0b8..ee667723362 100644 --- a/qa/tasks/cephfs/test_full.py +++ b/qa/tasks/cephfs/test_full.py @@ -236,7 +236,8 @@ class FullnessTestCase(CephFSTestCase): self.mount_a.run_python(template.format( fill_mb=self.fill_mb, file_path=file_path, - full_wait=full_wait + full_wait=full_wait, + is_fuse=isinstance(self.mount_a, FuseMount) )) def test_full_fclose(self): @@ -285,13 +286,17 @@ class FullnessTestCase(CephFSTestCase): # Wait long enough for a background flush that should fail time.sleep(30) - # ...and check that the failed background flush is reflected in fclose - try: - os.close(f) - except OSError: - print "close() returned an error as expected" + if {is_fuse}: + # ...and check that the failed background flush is reflected in fclose + try: + os.close(f) + except OSError: + print "close() returned an error as expected" + else: + raise RuntimeError("close() failed to raise error") else: - raise RuntimeError("close() failed to raise error") + # The kernel cephfs client does not raise errors on fclose + os.close(f) os.unlink("{file_path}") """) @@ -353,13 +358,12 @@ class FullnessTestCase(CephFSTestCase): if not full: raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) - # The error sticks to the inode until we dispose of it - try: - os.close(f) - except OSError: - print "Saw error from close() as expected" - else: - raise RuntimeError("Did not see expected error from close()") + # close() should not raise an error because we already caught it in + # fsync. There shouldn't have been any more writeback errors + # since then because all IOs got cancelled on the full flag. + print "calling close" + os.close(f) + print "close() did not raise error" os.unlink("{file_path}") """) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 3ee5a74a09f..4d182d8082e 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -1,6 +1,7 @@ set(libclient_srcs Client.cc Dentry.cc + Fh.cc Inode.cc MetaRequest.cc ClientSnapRealm.cc diff --git a/src/client/Client.cc b/src/client/Client.cc index 686577dcb43..a2f97db9b99 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2391,7 +2391,7 @@ void Client::_handle_full_flag(int64_t pool) ldout(cct, 4) << __func__ << ": FULL: inode 0x" << std::hex << i->first << std::dec << " has dirty objects, purging and setting ENOSPC" << dendl; objectcacher->purge_set(&inode->oset); - inode->async_err = -ENOSPC; + inode->set_async_err(-ENOSPC); } } @@ -3002,7 +3002,7 @@ public: ldout(client->cct, 1) << "I/O error from flush on inode " << inode << " 0x" << std::hex << inode->ino << std::dec << ": " << r << "(" << cpp_strerror(r) << ")" << dendl; - inode->async_err = r; + inode->set_async_err(r); } } }; @@ -8089,14 +8089,12 @@ int Client::lookup_name(Inode *ino, Inode *parent, const UserPerm& perms) Fh *Client::_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms) { - // yay - Fh *f = new Fh; + assert(in); + Fh *f = new Fh(in); f->mode = cmode; f->flags = flags; // inode - assert(in); - f->inode = in; f->actor_perms = perms; ldout(cct, 10) << "_create_fh " << in->ino << " mode " << cmode << dendl; @@ -8145,8 +8143,8 @@ int Client::_release_fh(Fh *f) _release_filelocks(f); - // Finally, read any async err (i.e. from flushes) from the inode - int err = in->async_err; + // Finally, read any async err (i.e. from flushes) + int err = f->take_async_err(); if (err != 0) { ldout(cct, 1) << "_release_fh " << f << " on inode " << *in << " caught async_err = " << cpp_strerror(err) << dendl; @@ -8162,8 +8160,9 @@ int Client::_release_fh(Fh *f) void Client::_put_fh(Fh *f) { int left = f->put(); - if (!left) + if (!left) { delete f; + } } int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, @@ -9047,7 +9046,7 @@ done: int Client::_flush(Fh *f) { Inode *in = f->inode.get(); - int err = in->async_err; + int err = f->take_async_err(); if (err != 0) { ldout(cct, 1) << __func__ << ": " << f << " on inode " << *in << " caught async_err = " << cpp_strerror(err) << dendl; @@ -9099,7 +9098,21 @@ int Client::fsync(int fd, bool syncdataonly) return -EBADF; #endif int r = _fsync(f, syncdataonly); - ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly << ") = " << r << dendl; + if (r == 0) { + // The IOs in this fsync were okay, but maybe something happened + // in the background that we shoudl be reporting? + r = f->take_async_err(); + ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly + << ") = 0, async_err = " << r << dendl; + } else { + // Assume that an error we encountered during fsync, even reported + // synchronously, would also have applied the error to the Fh, and we + // should clear it here to avoid returning the same error again on next + // call. + ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly << ") = " + << r << dendl; + f->take_async_err(); + } return r; } @@ -9165,12 +9178,6 @@ int Client::_fsync(Inode *in, bool syncdataonly) << cpp_strerror(-r) << dendl; } - if (in->async_err) { - ldout(cct, 1) << "ino " << in->ino << " marked with error from background flush! " - << cpp_strerror(in->async_err) << dendl; - r = in->async_err; - } - return r; } @@ -12249,7 +12256,12 @@ int Client::ll_fsync(Fh *fh, bool syncdataonly) tout(cct) << "ll_fsync" << std::endl; tout(cct) << (unsigned long)fh << std::endl; - return _fsync(fh, syncdataonly); + int r = _fsync(fh, syncdataonly); + if (r) { + // If we're returning an error, clear it from the FH + fh->take_async_err(); + } + return r; } #ifdef FALLOC_FL_PUNCH_HOLE diff --git a/src/client/Fh.cc b/src/client/Fh.cc new file mode 100644 index 00000000000..e42a9d4128e --- /dev/null +++ b/src/client/Fh.cc @@ -0,0 +1,32 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 Red Hat Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "Inode.h" + +#include "Fh.h" + +Fh::Fh(Inode *in) : inode(in), _ref(1), pos(0), mds(0), mode(0), flags(0), + pos_locked(false), actor_perms(), readahead(), + fcntl_locks(NULL), flock_locks(NULL) +{ + inode->add_fh(this); +} + +Fh::~Fh() +{ + inode->rm_fh(this); +} + diff --git a/src/client/Fh.h b/src/client/Fh.h index 170d0e70301..33733a5e1c1 100644 --- a/src/client/Fh.h +++ b/src/client/Fh.h @@ -4,9 +4,11 @@ #include "common/Readahead.h" #include "include/types.h" #include "InodeRef.h" +#include "UserPerm.h" class Cond; class ceph_lock_state_t; +class Inode; // file handle for any open file state @@ -28,9 +30,22 @@ struct Fh { // file lock ceph_lock_state_t *fcntl_locks; ceph_lock_state_t *flock_locks; + + // IO error encountered by any writeback on this Inode while + // this Fh existed (i.e. an fsync on another Fh will still show + // up as an async_err here because it could have been the same + // bytes we wrote via this Fh). + int async_err = {0}; + + int take_async_err() + { + int e = async_err; + async_err = 0; + return e; + } - Fh() : _ref(1), pos(0), mds(0), mode(0), flags(0), pos_locked(false), - actor_perms(), readahead(), fcntl_locks(NULL), flock_locks(NULL) {} + Fh(Inode *in); + ~Fh(); void get() { ++_ref; } int put() { return --_ref; } }; diff --git a/src/client/Inode.cc b/src/client/Inode.cc index f4103068053..bb1601a7220 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -5,6 +5,7 @@ #include "Inode.h" #include "Dentry.h" #include "Dir.h" +#include "Fh.h" #include "MetaSession.h" #include "ClientSnapRealm.h" @@ -546,3 +547,11 @@ void CapSnap::dump(Formatter *f) const f->dump_int("dirty_data", (int)dirty_data); f->dump_unsigned("flush_tid", flush_tid); } + +void Inode::set_async_err(int r) +{ + for (const auto &fh : fhs) { + fh->async_err = r; + } +} + diff --git a/src/client/Inode.h b/src/client/Inode.h index 5ea3302f8d0..e8ce367aa83 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -24,6 +24,7 @@ struct Inode; class ceph_lock_state_t; class MetaRequest; class filepath; +class Fh; struct Cap { MetaSession *session; @@ -227,6 +228,8 @@ struct Inode { xlist unsafe_ops; + std::set fhs; + Inode(Client *c, vinodeno_t vino, file_layout_t *newlayout) : client(c), ino(vino.ino), snapid(vino.snapid), faked_ino(0), rdev(0), mode(0), uid(0), gid(0), nlink(0), @@ -243,8 +246,7 @@ struct Inode { oset((void *)this, newlayout->pool_id, this->ino), reported_size(0), wanted_max_size(0), requested_max_size(0), _ref(0), ll_ref(0), dn_set(), - fcntl_locks(NULL), flock_locks(NULL), - async_err(0) + fcntl_locks(NULL), flock_locks(NULL) { memset(&dir_layout, 0, sizeof(dir_layout)); memset("a, 0, sizeof(quota)); @@ -286,9 +288,9 @@ struct Inode { bool have_valid_size(); Dir *open_dir(); - // Record errors to be exposed in fclose/fflush - int async_err; - + void add_fh(Fh *f) {fhs.insert(f);} + void rm_fh(Fh *f) {fhs.erase(f);} + void set_async_err(int r); void dump(Formatter *f) const; };