From e561e98e5cca2678854e01c990f95e474022b7ed Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Tue, 1 Dec 2020 07:53:56 -0500 Subject: [PATCH] rgw_file: return common_prefixes in lexical order Since inception RGWReaddirRequest has sent all leaf objects first (in lexical order), then common_prefixes (in lexical order). In hindsight, an overall listing could trivially be returned out of lexical order, which can cause continued listing of large, mixed directories to fail. RCA by Dan Gryniewicz. Fixes: https://tracker.ceph.com/issues/48410 Signed-off-by: Matt Benjamin --- src/rgw/rgw_file.h | 251 +++++++++++++++++++++++++++++++-------------- 1 file changed, 174 insertions(+), 77 deletions(-) diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h index 3dde51a9bc7..50a8c4fec1f 100644 --- a/src/rgw/rgw_file.h +++ b/src/rgw/rgw_file.h @@ -1553,94 +1553,191 @@ public: void send_response() override { struct req_state* state = get_state(); - for (const auto& iter : objs) { - - std::string_view sref {iter.key.name}; - - lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix - << " obj: " << sref << dendl; - - size_t last_del = sref.find_last_of('/'); - if (last_del != string::npos) - sref.remove_prefix(last_del+1); - - /* leaf directory? */ - if (sref.empty()) - continue; - - lsubdout(cct, rgw, 15) << "RGWReaddirRequest " - << __func__ << " " - << "list uri=" << state->relative_uri << " " - << " prefix=" << prefix << " " - << " obj path=" << iter.key.name - << " (" << sref << ")" << "" - << " mtime=" - << real_clock::to_time_t(iter.meta.mtime) - << " size=" << iter.meta.accounted_size - << dendl; - - if (! this->operator()(sref, next_marker, iter.meta.mtime, - iter.meta.accounted_size, RGW_FS_TYPE_FILE)) { - /* caller cannot accept more */ - lsubdout(cct, rgw, 5) << "readdir rcb failed" - << " dirent=" << sref.data() - << " call count=" << ix - << dendl; - rcb_eof = true; - return; - } - ++ix; - } - auto cnow = real_clock::now(); - for (auto& iter : common_prefixes) { - lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix - << " iter first: " << iter.first - << " iter second: " << iter.second - << dendl; + /* enumerate objs and common_prefixes in parallel, + * avoiding increment on and end iterator, which is + * undefined */ - /* XXX aieee--I have seen this case! */ - if (iter.first == "/") - continue; + class DirIterator + { + vector& objs; + vector::iterator obj_iter; - /* it's safest to modify the element in place--a suffix-modifying - * string_ref operation is problematic since ULP rgw_file callers - * will ultimately need a c-string */ - if (iter.first.back() == '/') - const_cast(iter.first).pop_back(); + map& common_prefixes; + map::iterator cp_iter; - std::string_view sref{iter.first}; + boost::optional obj_sref; + boost::optional cp_sref; + bool _skip_cp; - size_t last_del = sref.find_last_of('/'); - if (last_del != string::npos) - sref.remove_prefix(last_del+1); + public: - lsubdout(cct, rgw, 15) << "RGWReaddirRequest " - << __func__ << " " - << "list uri=" << state->relative_uri << " " - << " prefix=" << prefix << " " - << " cpref=" << sref - << dendl; + DirIterator(vector& objs, + map& common_prefixes) + : objs(objs), common_prefixes(common_prefixes), _skip_cp(false) + { + obj_iter = objs.begin(); + parse_obj(); + cp_iter = common_prefixes.begin(); + parse_cp(); + } - if (sref.empty()) { - /* null path segment--could be created in S3 but has no NFS - * interpretation */ - return; + bool is_obj() { + return (obj_iter != objs.end()); } - if (! this->operator()(sref, next_marker, cnow, 0, - RGW_FS_TYPE_DIRECTORY)) { - /* caller cannot accept more */ - lsubdout(cct, rgw, 5) << "readdir rcb failed" - << " dirent=" << sref.data() - << " call count=" << ix - << dendl; - rcb_eof = true; - return; + bool is_cp(){ + return (cp_iter != common_prefixes.end()); } - ++ix; - } + + bool eof() { + return ((!is_obj()) && (!is_cp())); + } + + void parse_obj() { + if (is_obj()) { + std::string_view sref{obj_iter->key.name}; + size_t last_del = sref.find_last_of('/'); + if (last_del != string::npos) + sref.remove_prefix(last_del+1); + obj_sref = sref; + } + } /* parse_obj */ + + void next_obj() { + ++obj_iter; + parse_obj(); + } + + void parse_cp() { + if (is_cp()) { + /* leading-/ skip case */ + if (cp_iter->first == "/") { + _skip_cp = true; + return; + } else + _skip_cp = false; + + /* it's safest to modify the element in place--a suffix-modifying + * string_ref operation is problematic since ULP rgw_file callers + * will ultimately need a c-string */ + if (cp_iter->first.back() == '/') + const_cast(cp_iter->first).pop_back(); + + std::string_view sref{cp_iter->first}; + size_t last_del = sref.find_last_of('/'); + if (last_del != string::npos) + sref.remove_prefix(last_del+1); + cp_sref = sref; + } /* is_cp */ + } /* parse_cp */ + + void next_cp() { + ++cp_iter; + parse_cp(); + } + + bool skip_cp() { + return _skip_cp; + } + + bool entry_is_obj() { + return (is_obj() && + ((! is_cp()) || + (obj_sref.get() < cp_sref.get()))); + } + + std::string_view get_obj_sref() { + return obj_sref.get(); + } + + std::string_view get_cp_sref() { + return cp_sref.get(); + } + + vector::iterator& get_obj_iter() { + return obj_iter; + } + + map::iterator& get_cp_iter() { + return cp_iter; + } + + }; /* DirIterator */ + + DirIterator di{objs, common_prefixes}; + + for (;;) { + + if (di.eof()) { + break; // done + } + + /* assert: one of is_obj() || is_cp() holds */ + if (di.entry_is_obj()) { + auto sref = di.get_obj_sref(); + if (sref.empty()) { + /* recursive list of a leaf dir (iirc), do nothing */ + } else { + /* send a file entry */ + auto obj_entry = *(di.get_obj_iter()); + + lsubdout(cct, rgw, 15) << "RGWReaddirRequest " + << __func__ << " " + << "list uri=" << state->relative_uri << " " + << " prefix=" << prefix << " " + << " obj path=" << obj_entry.key.name + << " (" << sref << ")" << "" + << " mtime=" + << real_clock::to_time_t(obj_entry.meta.mtime) + << " size=" << obj_entry.meta.accounted_size + << dendl; + + if (! this->operator()(sref, next_marker, obj_entry.meta.mtime, + obj_entry.meta.accounted_size, + RGW_FS_TYPE_FILE)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop" + << " dirent=" << sref.data() + << " call count=" << ix + << dendl; + rcb_eof = true; + return; + } + } + di.next_obj(); // and advance object + } else { + /* send a dir entry */ + if (! di.skip_cp()) { + auto sref = di.get_cp_sref(); + + lsubdout(cct, rgw, 15) << "RGWReaddirRequest " + << __func__ << " " + << "list uri=" << state->relative_uri << " " + << " prefix=" << prefix << " " + << " cpref=" << sref + << dendl; + + if (sref.empty()) { + /* null path segment--could be created in S3 but has no NFS + * interpretation */ + } else { + if (! this->operator()(sref, next_marker, cnow, 0, + RGW_FS_TYPE_DIRECTORY)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop" + << " dirent=" << sref.data() + << " call count=" << ix + << dendl; + rcb_eof = true; + return; + } + } + } + di.next_cp(); // and advance common_prefixes + } /* ! di.entry_is_obj() */ + } /* for (;;) */ } virtual void send_versioned_response() {