From c71b521b8ab5d04bb4a1289a37d1fe30d2ca7f53 Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Fri, 21 Nov 2014 09:41:38 -0800 Subject: [PATCH 1/4] Add "strong" claim to buffer::list and buffer::ptr. A strong claim is almost equivalent to claim, but deep copies volatile buffers to new raw_char buffers. A volatile buffer is one which cannot be held/claimed for long periods due to external dependencies (e.g., Accelio message buffers are volatile). Strong claim is now the unmarked case, so is performed automatically in the buffer::list copy constructor. Low-level operations should allow explicit volatile sharing, so copying a buffer::list:iterator or a buffer::ptr works as normal. For explicit sharing of volatile buffers, a new boolean is added to the public claim_* methods, and a buffer::list::share method is also added, to share an entire sequence. Signed-off-by: Vu Pham Signed-off-by: Matt Benjamin Signed-off-by: Matt Benjamin Conflicts: src/include/buffer.h Signed-off-by: Matt Benjamin Conflicts: src/common/buffer.cc --- src/common/buffer.cc | 32 ++++++++++++++++++++++++++------ src/include/buffer.h | 38 ++++++++++++++++++++++++++++++++------ src/msg/Message.h | 4 ++-- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/src/common/buffer.cc b/src/common/buffer.cc index f50d103e47..05850a5589 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -161,6 +161,10 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER; bool is_n_page_sized() { return (len & ~CEPH_PAGE_MASK) == 0; } + virtual bool is_volatile() { + // true if unsafe to claim-hold due to, e.g., special registration + return false; + } bool get_crc(const pair &fromto, pair *crc) const { Mutex::Locker l(crc_lock); @@ -602,6 +606,18 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER; return _raw->clone(); } + buffer::ptr& buffer::ptr::strong_claim() { + if (_raw && _raw->is_volatile()) { + buffer::raw *tr = _raw; + _raw = tr->clone(); + _raw->nref.set(1); + if (unlikely(tr->nref.dec() == 0)) { + delete tr; + } + } + return *this; + } + void buffer::ptr::swap(ptr& other) { raw *r = _raw; @@ -1164,27 +1180,31 @@ void buffer::list::rebuild_page_aligned() } // sort-of-like-assignment-op - void buffer::list::claim(list& bl) + void buffer::list::claim(list& bl, bool strong) { // free my buffers clear(); - claim_append(bl); + claim_append(bl, strong); } - void buffer::list::claim_append(list& bl) + void buffer::list::claim_append(list& bl, bool strong) { // steal the other guy's buffers _len += bl._len; - _buffers.splice( _buffers.end(), bl._buffers ); + if (strong) + bl.strong_claim_inplace(); + _buffers.splice(_buffers.end(), bl._buffers ); bl._len = 0; bl.last_p = bl.begin(); } - void buffer::list::claim_prepend(list& bl) + void buffer::list::claim_prepend(list& bl, bool strong) { // steal the other guy's buffers _len += bl._len; - _buffers.splice( _buffers.begin(), bl._buffers ); + if (strong) + bl.strong_claim_inplace(); + _buffers.splice(_buffers.begin(), bl._buffers ); bl._len = 0; bl.last_p = bl.begin(); } diff --git a/src/include/buffer.h b/src/include/buffer.h index 0c917302f6..9d23e24c4e 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -173,6 +173,7 @@ class buffer { raw *clone(); void swap(ptr& other); + ptr& strong_claim(); // misc bool at_buffer_head() const { return _off == 0; } @@ -323,12 +324,17 @@ class buffer { append_buffer.set_length(0); // unused, so far. } ~list() {} - - list(const list& other) : _buffers(other._buffers), _len(other._len), _memcopy_count(other._memcopy_count),last_p(this) { } + list(const list& other) : _buffers(other._buffers), _len(other._len), + _memcopy_count(other._memcopy_count), last_p(this) { + // make strong-claim semantics the unmarked case + strong_claim_inplace(); + } list& operator= (const list& other) { if (this != &other) { _buffers = other._buffers; _len = other._len; + // make strong-claim semantics the unmarked case + strong_claim_inplace(); } return *this; } @@ -396,10 +402,30 @@ class buffer { void rebuild_aligned(unsigned align); void rebuild_page_aligned(); - // sort-of-like-assignment-op - void claim(list& bl); - void claim_append(list& bl); - void claim_prepend(list& bl); + // assignment-op with move semantics + void claim(list& bl, bool strong=true); + void claim_append(list& bl, bool strong=true); + void claim_prepend(list& bl, bool strong=true); + + // non-destructively replace volatile buffers + void strong_claim_inplace() { + std::list::iterator pb; + for (pb = _buffers.begin(); pb != _buffers.end(); ++pb) { + (void) pb->strong_claim(); + } + } + + // copy with explicit volatile-sharing semantics + void share(list& bl) + { + if (this != &bl) { + clear(); + std::list::iterator pb; + for (pb = bl._buffers.begin(); pb != bl._buffers.end(); ++pb) { + push_back(*pb); + } + } + } iterator begin() { return iterator(this, 0); diff --git a/src/msg/Message.h b/src/msg/Message.h index cb47058d0c..66f225688c 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -337,10 +337,10 @@ class Message : public RefCountedObject { } bufferlist& get_data() { return data; } - void claim_data(bufferlist& bl) { + void claim_data(bufferlist& bl, bool strong=true) { if (byte_throttler) byte_throttler->put(data.length()); - bl.claim(data); + bl.claim(data, strong); } off_t get_data_len() { return data.length(); } From 69d9ee43f6dd3622a71f7b8d336575546c0f11cf Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Wed, 24 Sep 2014 11:06:05 -0500 Subject: [PATCH 2/4] Restore zero-copy buffers in OSD fast path. This change restores volatile sharing semantics in the Message decode path, and also in the OSD write path for FileStore/FileJournal. This can be verified with a breakpoint set at the clone/COW case in buffer::ptr::strong_claim (currently buffer.cc:690). Signed-off-by: Matt Benjamin Signed-off-by: Matt Benjamin --- src/msg/Message.h | 8 ++++---- src/os/FileJournal.cc | 2 +- src/os/FileJournal.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/msg/Message.h b/src/msg/Message.h index 66f225688c..c9e6835b9b 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -314,7 +314,7 @@ class Message : public RefCountedObject { void set_payload(bufferlist& bl) { if (byte_throttler) byte_throttler->put(payload.length()); - payload.claim(bl); + payload.claim(bl, false /* !strong */); if (byte_throttler) byte_throttler->take(payload.length()); } @@ -322,16 +322,16 @@ class Message : public RefCountedObject { void set_middle(bufferlist& bl) { if (byte_throttler) byte_throttler->put(payload.length()); - middle.claim(bl); + middle.claim(bl, false /* !strong */); if (byte_throttler) byte_throttler->take(payload.length()); } bufferlist& get_middle() { return middle; } - void set_data(const bufferlist &d) { + void set_data(bufferlist &bl) { if (byte_throttler) byte_throttler->put(data.length()); - data = d; + data.claim(bl, false /* !strong */); if (byte_throttler) byte_throttler->take(data.length()); } diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc index 388190f188..036db0999f 100644 --- a/src/os/FileJournal.cc +++ b/src/os/FileJournal.cc @@ -913,7 +913,7 @@ int FileJournal::prepare_single_write(bufferlist& bl, off64_t& queue_pos, uint64 bufferptr bp = buffer::create_static(pre_pad, zero_buf); bl.push_back(bp); } - bl.claim_append(ebl); + bl.claim_append(ebl, false /* ! strong */); // potential zero-copy if (h.post_pad) { bufferptr bp = buffer::create_static(post_pad, zero_buf); diff --git a/src/os/FileJournal.h b/src/os/FileJournal.h index 878b9094b6..8ae75cde43 100644 --- a/src/os/FileJournal.h +++ b/src/os/FileJournal.h @@ -54,7 +54,7 @@ class FileJournal : public Journal { TrackedOpRef tracked_op; write_item(uint64_t s, bufferlist& b, int al, TrackedOpRef opref) : seq(s), alignment(al), tracked_op(opref) { - bl.claim(b); + bl.claim(b, false /* !strong */); // potential zero-copy } write_item() : seq(0), alignment(0) {} }; From 9c8cc9905e337af71ddc1d1310dd0c329ca9252d Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Mon, 1 Dec 2014 14:31:22 -0500 Subject: [PATCH 3/4] Message::set_data cannot preserve constness. Signed-off-by: Matt Benjamin --- src/client/Client.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 6b43ccce38..0e8ee2d375 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -4302,7 +4302,7 @@ int Client::mds_command( // Construct and send MCommand MCommand *m = new MCommand(monclient->get_fsid()); m->cmd = cmd; - m->set_data(inbl); + m->set_data(const_cast(inbl)); m->set_tid(tid); conn->send_message(m); } From 532b8401fadc48bbe938756992428e3e287c9d5a Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Tue, 2 Dec 2014 18:33:58 -0500 Subject: [PATCH 4/4] Include common/likely.h in buffer.h Signed-off-by: Matt Benjamin --- src/common/buffer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/buffer.cc b/src/common/buffer.cc index 05850a5589..04c7c64a0c 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -19,6 +19,7 @@ #include "common/safe_io.h" #include "common/simple_spin.h" #include "common/strtol.h" +#include "common/likely.h" #include "include/atomic.h" #include "common/Mutex.h" #include "include/types.h"