1
0
mirror of https://github.com/ceph/ceph synced 2025-03-25 11:48:05 +00:00
ceph/src/compressor/brotli/BrotliCompressor.cc
Jiang Yutang cc59da9785 common/buffer.cc: add create_small_page_aligned to avoid mem waste when apply for small mem in big page size(e.g. 64k) OS
On my arm64 dev board, CentOS 7.4, the default OS page size is 64k, one SSD disk,
ceph version is 13.2.1. When I do fio randread test(bs=4k), the ceph-osd process uses a
large amount of memory(more than 20G), while bs=64, just more than 2G.
After traceing the mem allocate process, it is found to be related to page size
alignment - applying for small mem(4k) but align to big page size(64k) will lead
to waste memory.

With reference to the original create_page_aligned, add a new interface
create_small_page_aligned to useing 4k alignment. Go through all the callers of
create_page_aligned, divide the big and small page align according to the
relationship between applying for and current page size. Individual callers with
their own context logic not do the diversion.

After using the patch, do the fio randread test(bs=4k) in 64k page size OS, the
memory used by the ceph-osd process be reduced from more than 20G to about 3G;
for the bs=16k case, the memory used is also significantly reduced; while the
reading performance has not been reduced.

When I porting the patch to the last ceph tree(version 14.0.0-xxx), also made a
comparative verification. For the fio(bs=4k) test, although the current 14.0.0-x
version is less mem expensive than the 13.2.1 version, but the memory usage of
using the patche is also reduced significantly.

The following is a partial comparison of validation data, different software and
hardware environments may have different test values, the better the performance
of the SSD, the more memory it will use.

ceph version	bs	VIRT	RES
13.2.1		64k	3600896	2.7g
13.2.1		64k	3610112	2.7g
13.2.1		64k	3614208	2.7g
13.2.1		16k	7485184	6.4g
13.2.1		16k	7486208	6.4g
13.2.1		16k	7486208	6.4g
13.2.1		4k	23.7g	22.9g	<--A lot of waste
13.2.1		4k	23.7g	22.9g
13.2.1		4k	23.7g	22.9g
13.2.1+patch	64k	3632384	2.7g
13.2.1+patch	64k	3636480	2.7g
13.2.1+patch	64k	3640576	2.7g
13.2.1+patch	16k	3175296	2.2g
13.2.1+patch	16k	3175296	2.2g
13.2.1+patch	16k	3176320	2.2g
13.2.1+patch	4k	4265920	3.3g	<--Reasonable usage quantity
13.2.1+patch	4k	4265920	3.3g
13.2.1+patch	4k	4265920	3.3g

14.0.0-x	64k	6230784	4.4g
14.0.0-x	64k	5731840	4.1g
14.0.0-x	64k	4547072	3.5g
14.0.0-x	64k	4544000	3.6g
14.0.0-x	16k	6272192	5.2g
14.0.0-x	16k	6343168	5.3g
14.0.0-x	16k	6357696	5.3g
14.0.0-x	4k	10.1g	9.3g	<--A lot of waste
14.0.0-x	4k	10.3g	9.6g
14.0.0-x	4k	10.3g	9.4g
14.0.0-x+patch	64k	5974464	4.6g
14.0.0-x+patch	64k	4547008	3.5g
14.0.0-x+patch	64k	4556288	3.6g
14.0.0-x+patch	16k	4058560	3.1g
14.0.0-x+patch	16k	4053504	3.1g
14.0.0-x+patch	16k	4062720	3.1g
14.0.0-x+patch	4k	5283264	4.3g	<--Reasonable usage quantity
14.0.0-x+patch	4k	5324224	4.3g
14.0.0-x+patch	4k	5297600	4.3g

Signed-off-by: Jiang Yutang <yutang2.jiang@hxt-semitech.com>
2018-09-07 11:19:45 +08:00

96 lines
3.5 KiB
C++

#include "brotli/encode.h"
#include "brotli/decode.h"
#include "BrotliCompressor.h"
#include "include/scope_guard.h"
#define MAX_LEN (CEPH_PAGE_SIZE)
int BrotliCompressor::compress(const bufferlist &in, bufferlist &out)
{
BrotliEncoderState* s = BrotliEncoderCreateInstance(nullptr,
nullptr,
nullptr);
if (!s) {
return -1;
}
auto sg = make_scope_guard([&s] { BrotliEncoderDestroyInstance(s); });
BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)9);
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, 22);
for (auto i = in.buffers().begin(); i != in.buffers().end();) {
size_t available_in = i->length();
size_t max_comp_size = BrotliEncoderMaxCompressedSize(available_in);
size_t available_out = max_comp_size;
bufferptr ptr = buffer::create_small_page_aligned(max_comp_size);
uint8_t* next_out = (uint8_t*)ptr.c_str();
const uint8_t* next_in = (uint8_t*)i->c_str();
++i;
BrotliEncoderOperation finish = i != in.buffers().end() ?
BROTLI_OPERATION_PROCESS :
BROTLI_OPERATION_FINISH;
do {
if (!BrotliEncoderCompressStream(s,
finish,
&available_in,
&next_in,
&available_out,
&next_out,
nullptr)) {
return -1;
}
unsigned have = max_comp_size - available_out;
out.append(ptr, 0, have);
} while (available_out == 0);
if (BrotliEncoderIsFinished(s)) {
break;
}
}
return 0;
}
int BrotliCompressor::decompress(bufferlist::const_iterator &p,
size_t compressed_size,
bufferlist &out)
{
BrotliDecoderState* s = BrotliDecoderCreateInstance(nullptr,
nullptr,
nullptr);
if (!s) {
return -1;
}
auto sg = make_scope_guard([&s] { BrotliDecoderDestroyInstance(s); });
size_t remaining = std::min<size_t>(p.get_remaining(), compressed_size);
while (remaining) {
const uint8_t* next_in;
size_t len = p.get_ptr_and_advance(remaining, (const char**)&next_in);
remaining -= len;
size_t available_in = len;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_ERROR;
do {
size_t available_out = MAX_LEN;
bufferptr ptr = buffer::create_page_aligned(MAX_LEN);
uint8_t* next_out = (uint8_t*)ptr.c_str();
result = BrotliDecoderDecompressStream(s,
&available_in,
&next_in,
&available_out,
&next_out,
0);
if (!result) {
return -1;
}
unsigned have = MAX_LEN - available_out;
out.append(ptr, 0, have);
} while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
if (BrotliDecoderIsFinished(s)) {
break;
}
}
return 0;
}
int BrotliCompressor::decompress(const bufferlist &in, bufferlist &out)
{
bufferlist::iterator i = const_cast<bufferlist&>(in).begin();
return decompress(i, in.length(), out);
}