diff --git a/brick.h b/brick.h
index fbbba151..d1d58a82 100644
--- a/brick.h
+++ b/brick.h
@@ -28,6 +28,7 @@ extern int _brick_msleep(int msecs, bool shorten);
 
 #define BRICK_FAT(_fmt, _args...) _BRICK_MSG(SAY_FATAL, true,  _fmt, ##_args)
 #define BRICK_ERR(_fmt, _args...) _BRICK_MSG(SAY_ERROR, true,  _fmt, ##_args)
+#define BRICK_DMP(_fmt, _args...) _BRICK_MSG(SAY_ERROR, false, _fmt, ##_args)
 #define BRICK_WRN(_fmt, _args...) _BRICK_MSG(SAY_WARN,  false, _fmt, ##_args)
 #define BRICK_INF(_fmt, _args...) _BRICK_MSG(SAY_INFO,  false, _fmt, ##_args)
 
diff --git a/brick_atomic.h b/brick_atomic.h
new file mode 100644
index 00000000..7e6411fe
--- /dev/null
+++ b/brick_atomic.h
@@ -0,0 +1,83 @@
+// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
+#ifndef BRICK_ATOMIC_H
+#define BRICK_ATOMIC_H
+
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include "brick_checking.h"
+
+#define ATOMIC_DEBUGGING
+
+#ifndef CONFIG_MARS_DEBUG
+#undef  ATOMIC_DEBUGGING
+#endif
+
+#define ATOMIC_MAXTRACE 64
+
+/* Trivial wrapper to force type checking.
+ */
+typedef struct tatomic {
+	atomic_t ta_atomic;
+} tatomic_t;
+
+typedef struct atomic_trace {
+#ifdef ATOMIC_DEBUGGING
+	atomic_t at_count;
+	short at_lines[ATOMIC_MAXTRACE];
+#endif
+} atomic_trace_t;
+
+#ifdef ATOMIC_DEBUGGING
+
+#define tatomic_trace(_at, _cmd)					\
+	({								\
+		int _index = atomic_add_return(1, &(_at)->at_count) - 1; \
+		if (_index >= 0 && _index < ATOMIC_MAXTRACE) {		\
+			(_at)->at_lines[_index] = __LINE__;		\
+		}							\
+		_cmd;							\
+	})
+
+#define tatomic_out(_at,_MSG)						\
+	({								\
+		int __i;						\
+		int __max = atomic_read(&(_at)->at_count);		\
+		_MSG("at_count = %d\n", __max);				\
+		if (__max > ATOMIC_MAXTRACE)				\
+			__max = ATOMIC_MAXTRACE;			\
+		for (__i = 0; __i < __max; __i++) {			\
+			_MSG("%2d %4d\n", __i, (_at)->at_lines[__i]);	\
+		}							\
+	})
+
+#define _CHECK_TATOMIC(_at,_atom,OP,_minval)				\
+do {									\
+	if (BRICK_CHECKING) {						\
+		int __test = atomic_read(&(_atom)->ta_atomic);		\
+		if (__test OP (_minval)) {				\
+			atomic_set(&(_atom)->ta_atomic, _minval);	\
+			BRICK_ERR("%d: tatomic " #_atom " " #OP " " #_minval " (%d)\n", __LINE__, __test); \
+			tatomic_out(_at, BRICK_DMP);			\
+		}							\
+	}								\
+} while (0)
+
+#else
+
+#define tatomic_trace(_at,_cmd)  _cmd
+
+#define _CHECK_TATOMIC(_at,_atom,OP,_minval)				\
+	_CHECK_ATOMIC(&(_atom)->ta_atomic, OP, _minval)
+
+#endif
+
+#define CHECK_TATOMIC(_at,_atom,_minval)		\
+	_CHECK_TATOMIC(_at, _atom, <, _minval)
+
+#define tatomic_inc(at,a)           tatomic_trace(at, atomic_inc(&(a)->ta_atomic))
+#define tatomic_dec(at,a)           tatomic_trace(at, atomic_dec(&(a)->ta_atomic))
+#define tatomic_dec_and_test(at,a)  tatomic_trace(at, atomic_dec_and_test(&(a)->ta_atomic))
+
+#endif
diff --git a/mars.h b/mars.h
index 2a5cfb4d..d8285e4c 100644
--- a/mars.h
+++ b/mars.h
@@ -16,6 +16,7 @@
 
 #include "brick.h"
 #include "brick_mem.h"
+#include "brick_atomic.h"
 #include "lib_timing.h"
 
 #define GFP_MARS GFP_BRICK
@@ -112,14 +113,46 @@ extern void mars_log_trace(struct mref_object *mref);
 	bool   ref_skip_sync; /* skip sync for this particular mref */	\
 	/* maintained by the ref implementation, incrementable for	\
 	 * callers (but not decrementable! use ref_put()) */		\
-	atomic_t ref_count;						\
+	bool   ref_initialized; /* internally used for checking */	\
+	tatomic_t ref_count;						\
 	/* internal */							\
+	atomic_trace_t ref_at;						\
 	TRACING_INFO;
 
 struct mref_object {
 	MREF_OBJECT(mref);
 };
 
+#define _mref_check(mref)						\
+	({								\
+		if (unlikely(BRICK_CHECKING && !(mref)->ref_initialized)) { \
+			MARS_ERR("mref %p is not initialized\n", (mref)); \
+		}							\
+		CHECK_TATOMIC(&(mref)->ref_at, &(mref)->ref_count, 1);	\
+	})
+
+#define _mref_get_first(mref)						\
+	({								\
+		if (unlikely(BRICK_CHECKING && (mref)->ref_initialized)) { \
+			MARS_ERR("mref %p is already initialized\n", (mref)); \
+		}							\
+		_CHECK_TATOMIC(&(mref)->ref_at, &(mref)->ref_count, !=, 0); \
+		(mref)->ref_initialized = true;				\
+		tatomic_inc(&(mref)->ref_at, &(mref)->ref_count);	\
+	})
+
+#define _mref_get(mref)							\
+	({								\
+		_mref_check(mref);					\
+		tatomic_inc(&(mref)->ref_at, &(mref)->ref_count);	\
+	})
+
+#define _mref_put(mref)							\
+	({								\
+		_mref_check(mref);					\
+		tatomic_dec_and_test(&(mref)->ref_at, &(mref)->ref_count); \
+	})
+
 // internal helper structs
 
 struct mars_info {
diff --git a/mars_aio.c b/mars_aio.c
index 3417a879..5469859d 100644
--- a/mars_aio.c
+++ b/mars_aio.c
@@ -132,11 +132,11 @@ static int aio_ref_get(struct aio_output *output, struct mref_object *mref)
 {
 	struct file *file = output->filp;
 
-	if (atomic_read(&mref->ref_count) > 0) {
-		atomic_inc(&mref->ref_count);
+	if (mref->ref_initialized) {
+		_mref_get(mref);
 		return mref->ref_len;
 	}
-	
+
 	if (file) {
 		loff_t total_size = i_size_read(file->f_mapping->host);
 		mref->ref_total_size = total_size;
@@ -183,7 +183,7 @@ static int aio_ref_get(struct aio_output *output, struct mref_object *mref)
 		atomic_inc(&output->alloc_count);
 	}
 
-	atomic_inc(&mref->ref_count);
+	_mref_get_first(mref);
 	return mref->ref_len;
 }
 
@@ -192,8 +192,7 @@ static void aio_ref_put(struct aio_output *output, struct mref_object *mref)
 	struct file *file = output->filp;
 	struct aio_mref_aspect *mref_a;
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count)) {
+	if (!_mref_put(mref)) {
 		goto done;
 	}
 
@@ -213,6 +212,8 @@ static void aio_ref_put(struct aio_output *output, struct mref_object *mref)
 static
 void _complete(struct aio_output *output, struct mref_object *mref, int err)
 {
+	_mref_check(mref);
+
 	mars_trace(mref, "aio_endio");
 
 	if (err < 0) {
@@ -257,7 +258,7 @@ static void aio_ref_io(struct aio_output *output, struct mref_object *mref)
 	struct aio_mref_aspect *mref_a;
 	int err = -EINVAL;
 
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 	atomic_inc(&mars_global_io_flying);
 
 	// statistics
diff --git a/mars_bio.c b/mars_bio.c
index a7161882..00cac575 100644
--- a/mars_bio.c
+++ b/mars_bio.c
@@ -234,19 +234,23 @@ done:
 
 static int bio_ref_get(struct bio_output *output, struct mref_object *mref)
 {
-	struct bio_mref_aspect *mref_a = bio_mref_get_aspect(output->brick, mref);
+	struct bio_mref_aspect *mref_a;
 	int status = -EINVAL;
 
-	CHECK_PTR(mref_a, done);
+	CHECK_PTR(output, done);
 	CHECK_PTR(output->brick, done);
-	_CHECK_ATOMIC(&mref->ref_count, !=,  0);
 
-	if (mref_a->output)
-		goto ok;
+	if (mref->ref_initialized) {
+		_mref_get(mref);
+		return mref->ref_len;
+	}
 
+	mref_a = bio_mref_get_aspect(output->brick, mref);
+	CHECK_PTR(mref_a, done);
 	mref_a->output = output;
 	mref_a->bio = NULL;
 
+
 	if (!mref->ref_data) { // buffered IO.
 		status = -ENOMEM;
 		mref->ref_data = brick_block_alloc(mref->ref_pos, (mref_a->alloc_len = mref->ref_len));
@@ -270,8 +274,7 @@ static int bio_ref_get(struct bio_output *output, struct mref_object *mref)
 	MARS_IO("len = %d status = %d prio = %d fly = %d\n", mref->ref_len, status, mref->ref_prio, atomic_read(&output->brick->fly_count[PRIO_INDEX(mref)]));
 
 	mref->ref_len = status;
-ok:
-	atomic_inc(&mref->ref_count);
+	_mref_get_first(mref);
 	status = 0;
 
 done:
@@ -283,8 +286,7 @@ void bio_ref_put(struct bio_output *output, struct mref_object *mref)
 {
 	struct bio_mref_aspect *mref_a;
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count)) {
+	if (!_mref_put(mref)) {
 		goto done;
 	}
 
@@ -334,8 +336,7 @@ void _bio_ref_io(struct bio_output *output, struct mref_object *mref, bool cork)
 	bio = mref_a->bio;
 	CHECK_PTR(bio, err);
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 	atomic_inc(&brick->fly_count[PRIO_INDEX(mref)]);
 
 	bio_get(bio);
@@ -409,7 +410,7 @@ void bio_ref_io(struct bio_output *output, struct mref_object *mref)
 		struct bio_brick *brick = output->brick;
 		unsigned long flags;
 
-		atomic_inc(&mref->ref_count);
+		_mref_get(mref);
 
 		spin_lock_irqsave(&brick->lock, flags);
 		list_add_tail(&mref_a->io_head, &brick->queue_list[PRIO_INDEX(mref)]);
diff --git a/mars_buf.c b/mars_buf.c
index 55579ed8..20312a7d 100644
--- a/mars_buf.c
+++ b/mars_buf.c
@@ -424,8 +424,7 @@ static int buf_ref_get(struct buf_output *output, struct mref_object *mref)
 #endif
 	/* Grab reference.
 	 */
-	_CHECK_ATOMIC(&mref->ref_count, !=, 0);
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 
 	/* shortcut in case of unbuffered IO
 	 */
@@ -535,7 +534,7 @@ again:
 	mref->ref_flags = bf->bf_flags;
 	mref->ref_data = bf->bf_data + base_offset;
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
+	_mref_check(mref);
 	CHECK_ATOMIC(&bf->bf_hash_count, 1);
 	CHECK_ATOMIC(&bf->bf_mref_count, 1);
 
@@ -559,9 +558,7 @@ static void _buf_ref_put(struct buf_output *output, struct buf_mref_aspect *mref
 		return;
 	}
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-
-	if (!atomic_dec_and_test(&mref->ref_count))
+	if (!_mref_put(mref))
 		return;
 
 	MARS_DBG("buf_ref_put() mref=%p mref_a=%p bf=%p flags=%d\n", mref, mref_a, bf, bf->bf_flags);
@@ -802,7 +799,7 @@ static void _buf_endio(struct generic_callback *cb)
 			MARS_ERR("endless loop 2\n");
 		}
 #endif
-		CHECK_ATOMIC(&mref->ref_count, 1);
+		_mref_check(mref);
 		/* It should be safe to do this without locking, because
 		 * tmp is on the stack, so there is no concurrency.
 		 */
@@ -863,8 +860,7 @@ static void buf_ref_io(struct buf_output *output, struct mref_object *mref)
 	 * This will be released later in _bf_endio() after
 	 * calling the callbacks.
 	 */
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 	CHECK_ATOMIC(&bf->bf_hash_count, 1);
 
 	MARS_DBG("IO mref=%p rw=%d bf=%p flags=%d\n", mref, mref->ref_rw, bf, bf->bf_flags);
diff --git a/mars_client.c b/mars_client.c
index 4c7cfd9e..2e2f8493 100644
--- a/mars_client.c
+++ b/mars_client.c
@@ -148,18 +148,24 @@ static int client_get_info(struct client_output *output, struct mars_info *info)
 
 static int client_ref_get(struct client_output *output, struct mref_object *mref)
 {
+	int maxlen;
+
+	if (mref->ref_initialized) {
+		_mref_get(mref);
+		return mref->ref_len;
+	}
+
 #if 1
 	/* Limit transfers to page boundaries.
 	 * Currently, this is more restrictive than necessary.
 	 * TODO: improve performance by doing better when possible.
 	 * This needs help from the server in some efficient way.
 	 */
-	int maxlen = PAGE_SIZE - (mref->ref_pos & (PAGE_SIZE-1));
+	maxlen = PAGE_SIZE - (mref->ref_pos & (PAGE_SIZE-1));
 	if (mref->ref_len > maxlen)
 		mref->ref_len = maxlen;
 #endif
 
-	_CHECK_ATOMIC(&mref->ref_count, !=,  0);
 	if (!mref->ref_data) { // buffered IO
 		struct client_mref_aspect *mref_a = client_mref_get_aspect(output->brick, mref);
 		if (!mref_a)
@@ -173,15 +179,14 @@ static int client_ref_get(struct client_output *output, struct mref_object *mref
 		mref->ref_flags = 0;
 	}
 
-	atomic_inc(&mref->ref_count);
+	_mref_get_first(mref);
 	return 0;
 }
 
 static void client_ref_put(struct client_output *output, struct mref_object *mref)
 {
 	struct client_mref_aspect *mref_a;
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count))
+	if (!_mref_put(mref))
 		return;
 	mref_a = client_mref_get_aspect(output->brick, mref);
 	if (mref_a && mref_a->do_dealloc) {
@@ -213,7 +218,7 @@ static void client_ref_io(struct client_output *output, struct mref_object *mref
 
 	atomic_inc(&mars_global_io_flying);
 	atomic_inc(&output->fly_count);
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 
 	traced_lock(&output->lock, flags);
 	mref_a->submit_jiffies = jiffies;
diff --git a/mars_sio.c b/mars_sio.c
index 6a710356..730094b1 100644
--- a/mars_sio.c
+++ b/mars_sio.c
@@ -30,8 +30,9 @@ static int sio_ref_get(struct sio_output *output, struct mref_object *mref)
 {
 	struct file *file;
 
-	if (atomic_read(&mref->ref_count) > 0) {
-		goto done;
+	if (mref->ref_initialized) {
+		_mref_get(mref);
+		return mref->ref_len;
 	}
 
 	file = output->filp;
@@ -81,8 +82,7 @@ static int sio_ref_get(struct sio_output *output, struct mref_object *mref)
 		//atomic_inc(&output->alloc_count);
 	}
 
-done:
-	atomic_inc(&mref->ref_count);
+	_mref_get_first(mref);
 	return mref->ref_len;
 }
 
@@ -91,8 +91,7 @@ static void sio_ref_put(struct sio_output *output, struct mref_object *mref)
 	struct file *file;
 	struct sio_mref_aspect *mref_a;
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count))
+	if (!_mref_put(mref))
 		return;
 
 	file = output->filp;
@@ -320,6 +319,8 @@ static void sync_file(struct sio_output *output)
 static
 void _complete(struct sio_output *output, struct mref_object *mref, int err)
 {
+	_mref_check(mref);
+
 	mars_trace(mref, "sio_endio");
 
 	if (err < 0) {
@@ -358,6 +359,8 @@ void _sio_ref_io(struct sio_threadinfo *tinfo, struct mref_object *mref)
 	bool barrier = false;
 	int status;
 
+	_mref_check(mref);
+
 	atomic_inc(&tinfo->fly_count);
 
 	if (unlikely(!output->filp)) {
@@ -395,6 +398,8 @@ void sio_ref_io(struct sio_output *output, struct mref_object *mref)
 	struct sio_mref_aspect *mref_a;
 	unsigned long flags;
 
+	_mref_check(mref);
+
 	mref_a = sio_mref_get_aspect(output->brick, mref);
 	if (unlikely(!mref_a)) {
 		MARS_FAT("cannot get aspect\n");
@@ -403,7 +408,7 @@ void sio_ref_io(struct sio_output *output, struct mref_object *mref)
 	}
 
 	atomic_inc(&mars_global_io_flying);
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 
 	index = 0;
 	if (mref->ref_rw == READ) {
diff --git a/mars_trans_logger.c b/mars_trans_logger.c
index 2ea47b0d..fc158327 100644
--- a/mars_trans_logger.c
+++ b/mars_trans_logger.c
@@ -159,8 +159,7 @@ static inline
 void qq_mref_insert(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a)
 {
 	struct mref_object *mref = mref_a->object;
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	atomic_inc(&mref->ref_count); // must be paired with __trans_logger_ref_put()
+	_mref_get(mref); // must be paired with __trans_logger_ref_put()
 	atomic_inc(&q->q_brick->inner_balance_count);
 
 	mars_trace(mref, q->q_insert_info);
@@ -177,7 +176,7 @@ void qq_wb_insert(struct logger_queue *q, struct writeback_info *wb)
 static inline
 void qq_mref_pushback(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a)
 {
-	CHECK_ATOMIC(&mref_a->object->ref_count, 1);
+	_mref_check(mref_a->object);
 
 	mars_trace(mref_a->object, q->q_pushback_info);
 
@@ -200,7 +199,7 @@ struct trans_logger_mref_aspect *qq_mref_fetch(struct logger_queue *q)
 
 	if (test) {
 		mref_a = container_of(test, struct trans_logger_mref_aspect, lh);
-		CHECK_ATOMIC(&mref_a->object->ref_count, 1);
+		_mref_check(mref_a->object);
 		mars_trace(mref_a->object, q->q_fetch_info);
 	}
 	return mref_a;
@@ -266,7 +265,7 @@ struct trans_logger_mref_aspect *_hash_find(struct list_head *start, loff_t pos,
 		}
 		test = test_a->object;
 		
-		CHECK_ATOMIC(&test->ref_count, 1);
+		_mref_check(test);
 
 		// timestamp handling
 		if (elder && timespec_compare(&test_a->stamp, elder) > 0) {
@@ -325,7 +324,7 @@ void hash_insert(struct trans_logger_brick *brick, struct trans_logger_mref_aspe
 
 #if 1
 	CHECK_HEAD_EMPTY(&elem_a->hash_head);
-	CHECK_ATOMIC(&elem_a->object->ref_count, 1);
+	_mref_check(elem_a->object);
 #endif
 
 	// only for statistics:
@@ -379,7 +378,7 @@ void hash_extend(struct trans_logger_brick *brick, loff_t *_pos, int *_len, stru
 			test_a = container_of(tmp, struct trans_logger_mref_aspect, hash_head);
 			test = test_a->object;
 			
-			CHECK_ATOMIC(&test->ref_count, 1);
+			_mref_check(test);
 
 			// timestamp handling
 			if (elder && timespec_compare(&test_a->stamp, elder) > 0) {
@@ -445,7 +444,7 @@ void hash_extend(struct trans_logger_brick *brick, loff_t *_pos, int *_len, stru
 		// collect
 		CHECK_HEAD_EMPTY(&test_a->collect_head);
 		test_a->is_collected = true;
-		CHECK_ATOMIC(&test->ref_count, 1);
+		_mref_check(test);
 		list_add_tail(&test_a->collect_head, collect_list);
 	}
 
@@ -471,7 +470,7 @@ void hash_put_all(struct trans_logger_brick *brick, struct list_head *list)
 		elem_a = container_of(tmp, struct trans_logger_mref_aspect, collect_head);
 		elem = elem_a->object;
 		CHECK_PTR(elem, err);
-		CHECK_ATOMIC(&elem->ref_count, 1);
+		_mref_check(elem);
 
 		hash = hash_fn(elem->ref_pos);
 		if (!start) {
@@ -553,13 +552,13 @@ int _make_sshadow(struct trans_logger_output *output, struct trans_logger_mref_a
 
 	/* Get an ordinary internal reference
 	 */
-	atomic_inc(&mref->ref_count); // must be paired with __trans_logger_ref_put()
+	_mref_get_first(mref); // must be paired with __trans_logger_ref_put()
 	atomic_inc(&brick->inner_balance_count);
 
 	/* Get an additional internal reference from slave to master,
 	 * such that the master cannot go away before the slave.
 	 */
-	atomic_inc(&mshadow->ref_count);  // is compensated by master transition in __trans_logger_ref_put()
+	_mref_get(mshadow);  // is compensated by master transition in __trans_logger_ref_put()
 	atomic_inc(&brick->inner_balance_count);
 
 	atomic_inc(&brick->sshadow_count);
@@ -645,14 +644,14 @@ int _write_ref_get(struct trans_logger_output *output, struct trans_logger_mref_
 	mref->ref_flags = 0;
 	mref_a->shadow_ref = mref_a; // cyclic self-reference => indicates master shadow
 
-	atomic_inc(&mref->ref_count); // must be paired with __trans_logger_ref_put()
-	atomic_inc(&brick->inner_balance_count);
-
 	atomic_inc(&brick->mshadow_count);
 	atomic_inc(&brick->total_mshadow_count);
 	atomic_inc(&global_mshadow_count);
 	atomic64_add(mref->ref_len, &global_mshadow_used);
 
+	atomic_inc(&brick->inner_balance_count);
+	_mref_get_first(mref); // must be paired with __trans_logger_ref_put()
+
 	return mref->ref_len;
 }
 
@@ -676,10 +675,10 @@ int trans_logger_ref_get(struct trans_logger_output *output, struct mref_object
 
 	atomic_inc(&brick->outer_balance_count);
 
-	if (mref_a->stamp.tv_sec) { // setup already performed
-		MARS_IO("again %d\n", atomic_read(&mref->ref_count));
-		CHECK_ATOMIC(&mref->ref_count, 1);
-		atomic_inc(&mref->ref_count); // must be paired with __trans_logger_ref_put()
+	if (mref->ref_initialized) { // setup already performed
+		MARS_IO("again %d\n", atomic_read(&mref->ref_count.ta_atomic));
+		_mref_check(mref);
+		_mref_get(mref); // must be paired with __trans_logger_ref_put()
 		return mref->ref_len;
 	}
 
@@ -727,7 +726,7 @@ restart:
 
 	MARS_IO("pos = %lld len = %d\n", mref->ref_pos, mref->ref_len);
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
+	_mref_check(mref);
 
 	// are we a shadow (whether master or slave)?
 	shadow_a = mref_a->shadow_ref;
@@ -735,9 +734,10 @@ restart:
 		bool finished;
 
 		CHECK_PTR(shadow_a, err);
-		CHECK_ATOMIC(&mref->ref_count, 1);
+		CHECK_PTR(shadow_a->object, err);
+		_mref_check(shadow_a->object);
 
-		finished = atomic_dec_and_test(&mref->ref_count);
+		finished = _mref_put(mref);
 		atomic_dec(&brick->inner_balance_count);
 		if (unlikely(finished && mref_a->is_hashed)) {
 			   MARS_ERR("trying to put a hashed mref, pos = %lld len = %d\n", mref->ref_pos, mref->ref_len);
@@ -863,7 +863,7 @@ void trans_logger_ref_io(struct trans_logger_output *output, struct mref_object
 	struct trans_logger_mref_aspect *shadow_a;
 	struct trans_logger_input *input;
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
+	_mref_check(mref);
 
 	mref_a = trans_logger_mref_get_aspect(brick, mref);
 	CHECK_PTR(mref_a, err);
@@ -886,7 +886,7 @@ void trans_logger_ref_io(struct trans_logger_output *output, struct mref_object
 		CHECK_HEAD_EMPTY(&mref_a->hash_head);
 		CHECK_HEAD_EMPTY(&mref_a->pos_head);
 #endif
-		atomic_inc(&mref->ref_count); // must be paired with __trans_logger_ref_put()
+		_mref_get(mref); // must be paired with __trans_logger_ref_put()
 		atomic_inc(&brick->inner_balance_count);
 
 		qq_mref_insert(&brick->q_phase[0], mref_a);
@@ -983,7 +983,7 @@ void free_writeback(struct writeback_info *wb)
 		orig_mref_a = container_of(tmp, struct trans_logger_mref_aspect, collect_head);
 		orig_mref = orig_mref_a->object;
 		
-		CHECK_ATOMIC(&orig_mref->ref_count, 1);
+		_mref_check(orig_mref);
 		if (unlikely(!orig_mref_a->is_collected)) {
 			MARS_ERR("request %lld (len = %d) was not collected\n", orig_mref->ref_pos, orig_mref->ref_len);
 		}
@@ -1359,15 +1359,16 @@ void phase0_preio(void *private)
 
 	orig_mref_a = private;
 	CHECK_PTR(orig_mref_a, err);
+	CHECK_PTR(orig_mref_a->object, err);
 	brick = orig_mref_a->my_brick;
 	CHECK_PTR(brick, err);
 
 	// signal completion to the upper layer
 	// FIXME: immediate error signalling is impossible here, but some delayed signalling should be possible as a workaround. Think!
-	CHECK_ATOMIC(&orig_mref_a->object->ref_count, 1);
+	_mref_check(orig_mref_a->object);
 #ifdef EARLY_COMPLETION
 	_complete(brick, orig_mref_a, 0, true);
-	CHECK_ATOMIC(&orig_mref_a->object->ref_count, 1);
+	_mref_check(orig_mref_a->object);
 #endif
 	return;
 err: 
@@ -1393,9 +1394,8 @@ void phase0_endio(void *private, int error)
 	/* Pin mref->ref_count so it can't go away
 	 * after _complete().
 	 */
-	CHECK_ATOMIC(&orig_mref->ref_count, 1);
 	_CHECK(orig_mref_a->shadow_ref, err);
-	atomic_inc(&orig_mref->ref_count); // must be paired with __trans_logger_ref_put()
+	_mref_get(orig_mref); // must be paired with __trans_logger_ref_put()
 	atomic_inc(&brick->inner_balance_count);
 
 #ifndef LATE_COMPLETE
diff --git a/mars_usebuf.c b/mars_usebuf.c
index bd357a88..707ec4ff 100644
--- a/mars_usebuf.c
+++ b/mars_usebuf.c
@@ -93,19 +93,11 @@ static void _usebuf_endio(struct generic_callback *cb)
 #endif
 	CHECKED_CALLBACK(mref, cb->cb_error, done);
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count))
+	if (!_mref_put(mref))
 		return;
 
 #if 1
-	CHECK_ATOMIC(&sub_mref->ref_count, 2);
-	atomic_dec(&sub_mref->ref_count);
-	{
-		int test = atomic_read(&sub_mref->ref_count);
-		if (test > 1) {
-			MARS_INF("ref_count = %d\n", test);
-		}
-	}
+	_mref_put(sub_mref);
 #endif
 
 	usebuf_free_mref(mref);
@@ -173,7 +165,7 @@ static int usebuf_ref_get(struct usebuf_output *output, struct mref_object *mref
 		MARS_INF("uiiiiiiiiiii\n");
 		mref->ref_data = sub_mref->ref_data;
 	}
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 
 	return status;
 }
@@ -203,8 +195,7 @@ static void usebuf_ref_put(struct usebuf_output *output, struct mref_object *mre
 		return;
 	}
 
-	CHECK_ATOMIC(&mref->ref_count, 1);
-	if (!atomic_dec_and_test(&mref->ref_count))
+	if (!_mref_put(mref))
 		return;
 
 	GENERIC_INPUT_CALL(input, mref_put, sub_mref);
@@ -221,6 +212,8 @@ static void usebuf_ref_io(struct usebuf_output *output, struct mref_object *mref
 
 	might_sleep();
 
+	_mref_check(mref);
+
 	mref_a = usebuf_mref_get_aspect(output->brick, mref);
 	if (unlikely(!mref_a)) {
 		MARS_FAT("cannot get aspect\n");
@@ -244,7 +237,7 @@ static void usebuf_ref_io(struct usebuf_output *output, struct mref_object *mref
 		goto err;
 	}
 
-	atomic_inc(&mref->ref_count);
+	_mref_get(mref);
 
 	sub_mref->ref_rw = mref->ref_rw;
 	sub_mref->ref_len = mref->ref_len;