From db1643d4b12ac67e1fca5765ae80001450f96147 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Fri, 12 Jun 2015 14:56:41 +0800 Subject: [PATCH 1/4] osd/ReplicatedPG: For WRITEFULL replica object, only truncate if new size less than old size(only truncate to new size) If new size larger or equal old size, no need do truncate. It can diretcly overwrite. Signed-off-by: Jianpeng Ma --- src/osd/ReplicatedPG.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index bcaa72e972d..da47fe0a7dd 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -4267,10 +4267,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } } else { ctx->mod_desc.mark_unrollbackable(); - if (obs.exists) { - t->truncate(soid, 0); - } t->write(soid, op.extent.offset, op.extent.length, osd_op.indata, op.flags); + if (obs.exists && op.extent.length < oi.size) { + t->truncate(soid, op.extent.length); + } } maybe_create_new_object(ctx); obs.oi.set_data_digest(osd_op.indata.crc32c(-1)); From a1005b11ae6053b51c09567d84a2bf7aa4bd1294 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 2 Jul 2015 16:50:41 +0800 Subject: [PATCH 2/4] osd/Replicated: First calc crc then call write_update_size_and_usage. Because write_update_size_and_usage will change oi.size which cause don't calc new crc. Signed-off-by: Jianpeng Ma --- src/osd/ReplicatedPG.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index da47fe0a7dd..da72ce7a17e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -4211,8 +4211,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } else { t->write(soid, op.extent.offset, op.extent.length, osd_op.indata, op.flags); } - write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges, - op.extent.offset, op.extent.length, true); + maybe_create_new_object(ctx); if (op.extent.offset == 0 && op.extent.length >= oi.size) obs.oi.set_data_digest(osd_op.indata.crc32c(-1)); @@ -4220,6 +4219,9 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) obs.oi.set_data_digest(osd_op.indata.crc32c(obs.oi.data_digest)); else obs.oi.clear_data_digest(); + write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges, + op.extent.offset, op.extent.length, true); + } break; From 89a7b12e1a6e3c51bf9f3de74425396ebd45983e Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 2 Jul 2015 16:59:10 +0800 Subject: [PATCH 3/4] osd/Replicated: Using write_update_size_and_usage for WRITEFULL. Signed-off-by: Jianpeng Ma --- src/osd/ReplicatedPG.cc | 22 ++++++++-------------- src/osd/ReplicatedPG.h | 3 ++- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index da72ce7a17e..7312c8efea0 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -4277,17 +4277,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) maybe_create_new_object(ctx); obs.oi.set_data_digest(osd_op.indata.crc32c(-1)); - interval_set ch; - if (oi.size > 0) - ch.insert(0, oi.size); - ctx->modified_ranges.union_of(ch); - if (op.extent.length + op.extent.offset != oi.size) { - ctx->delta_stats.num_bytes -= oi.size; - oi.size = op.extent.length + op.extent.offset; - ctx->delta_stats.num_bytes += oi.size; - } - ctx->delta_stats.num_wr++; - ctx->delta_stats.num_wr_kb += SHIFT_ROUND_UP(op.extent.length, 10); + write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges, + op.extent.offset, op.extent.length, true, + op.extent.offset + op.extent.length != oi.size ? true : false); + } break; @@ -5527,15 +5520,16 @@ void ReplicatedPG::make_writeable(OpContext *ctx) void ReplicatedPG::write_update_size_and_usage(object_stat_sum_t& delta_stats, object_info_t& oi, interval_set& modified, uint64_t offset, - uint64_t length, bool count_bytes) + uint64_t length, bool count_bytes, bool force_changesize) { interval_set ch; if (length) ch.insert(offset, length); modified.union_of(ch); - if (length && (offset + length > oi.size)) { + if (force_changesize || offset + length > oi.size) { uint64_t new_size = offset + length; - delta_stats.num_bytes += new_size - oi.size; + delta_stats.num_bytes -= oi.size; + delta_stats.num_bytes += new_size; oi.size = new_size; } delta_stats.num_wr++; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index da2c7b3eee6..813b343d91c 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -1162,7 +1162,8 @@ protected: void write_update_size_and_usage(object_stat_sum_t& stats, object_info_t& oi, interval_set& modified, uint64_t offset, - uint64_t length, bool count_bytes); + uint64_t length, bool count_bytes, + bool force_changesize=false); void add_interval_usage(interval_set& s, object_stat_sum_t& st); /** From efccc58f1e5ea92ec23d6b808ff25782d2146280 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 2 Jul 2015 17:05:53 +0800 Subject: [PATCH 4/4] osd/ReplicatedPG: for writefull, offset is zero so replace offset w/ zero. Signed-off-by: Jianpeng Ma --- src/osd/ReplicatedPG.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 7312c8efea0..20fe86792e4 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -4228,13 +4228,13 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) case CEPH_OSD_OP_WRITEFULL: ++ctx->num_write; { // write full object - tracepoint(osd, do_osd_op_pre_writefull, soid.oid.name.c_str(), soid.snap.val, oi.size, op.extent.offset, op.extent.length); + tracepoint(osd, do_osd_op_pre_writefull, soid.oid.name.c_str(), soid.snap.val, oi.size, 0, op.extent.length); if (op.extent.length != osd_op.indata.length()) { result = -EINVAL; break; } - result = check_offset_and_length(op.extent.offset, op.extent.length, cct->_conf->osd_max_object_size); + result = check_offset_and_length(0, op.extent.length, cct->_conf->osd_max_object_size); if (result < 0) break; @@ -4250,7 +4250,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } } ctx->mod_desc.create(); - t->append(soid, op.extent.offset, op.extent.length, osd_op.indata, op.flags); + t->append(soid, 0, op.extent.length, osd_op.indata, op.flags); if (obs.exists) { map to_set = ctx->obc->attr_cache; map > &overlay = @@ -4269,7 +4269,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } } else { ctx->mod_desc.mark_unrollbackable(); - t->write(soid, op.extent.offset, op.extent.length, osd_op.indata, op.flags); + t->write(soid, 0, op.extent.length, osd_op.indata, op.flags); if (obs.exists && op.extent.length < oi.size) { t->truncate(soid, op.extent.length); } @@ -4278,9 +4278,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) obs.oi.set_data_digest(osd_op.indata.crc32c(-1)); write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges, - op.extent.offset, op.extent.length, true, - op.extent.offset + op.extent.length != oi.size ? true : false); - + 0, op.extent.length, true, op.extent.length != oi.size ? true : false); } break;