mars/kernel/mars_generic.c

1435 lines
34 KiB
C

/*
* MARS Long Distance Replication Software
*
* This file is part of MARS project: http://schoebel.github.io/mars/
*
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
* Copyright (C) 2011-2014 1&1 Internet AG
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/utsname.h>
#include "mars.h"
#include "mars_client.h"
//////////////////////////////////////////////////////////////
// infrastructure
struct banning mars_global_ban = {};
EXPORT_SYMBOL_GPL(mars_global_ban);
atomic_t mars_global_io_flying = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(mars_global_io_flying);
static char id[__NEW_UTS_LEN + 2] = {};
static int id_len = 0;
/* TODO: use MAC addresses (or motherboard IDs etc) for _validation_
* of nodenames.
* When the nodename is misconfigured, data might be scrambled.
* In ideal case, further checks should be added to prohibit accidental
* name clashes.
*/
char *my_id(void)
{
if (unlikely(!id[0])) {
struct new_utsname *u;
//down_read(&uts_sem); // FIXME: this is currently not EXPORTed from the kernel!
u = utsname();
if (u) {
strncpy(id, u->nodename, sizeof(id));
id_len = strlen(id);
}
//up_read(&uts_sem);
}
return id;
}
int my_id_len(void)
{
return id_len;
}
//////////////////////////////////////////////////////////////
// object stuff
const struct generic_object_type mref_type = {
.object_type_name = "mref",
.default_size = sizeof(struct mref_object),
.object_type_nr = OBJ_TYPE_MREF,
};
EXPORT_SYMBOL_GPL(mref_type);
//////////////////////////////////////////////////////////////
// brick stuff
/////////////////////////////////////////////////////////////////////
// meta descriptions
const struct meta mars_info_meta[] = {
META_INI(current_size, struct mars_info, FIELD_INT),
META_INI(tf_align, struct mars_info, FIELD_INT),
META_INI(tf_min_size, struct mars_info, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_info_meta);
const struct meta mars_mref_meta[] = {
META_INI(_object_cb.cb_error, struct mref_object, FIELD_INT),
META_INI(ref_pos, struct mref_object, FIELD_INT),
META_INI(ref_len, struct mref_object, FIELD_INT),
META_INI(ref_may_write, struct mref_object, FIELD_INT),
META_INI(ref_prio, struct mref_object, FIELD_INT),
META_INI(ref_cs_mode, struct mref_object, FIELD_INT),
META_INI(ref_timeout, struct mref_object, FIELD_INT),
META_INI(ref_total_size, struct mref_object, FIELD_INT),
/* QUIRK: for compatibility with the old layout, we have to
* pseudo-split the field.
* TODO: port "make data transfer independent from register size and bytesex"
* and then revert this to its old simple form.
* However, all old instances must have been updated before.
*/
{
__META_INI(ref_checksum, FIELD_RAW,
OLD_MARS_DIGEST_SIZE,
offsetof(struct mref_object, ref_checksum)),
},
{
__META_INI(ref_checksum_pseudo, FIELD_RAW,
MARS_DIGEST_SIZE - OLD_MARS_DIGEST_SIZE,
offsetof(struct mref_object, ref_checksum)
+ OLD_MARS_DIGEST_SIZE),
},
META_INI(ref_flags, struct mref_object, FIELD_UINT),
META_INI(ref_rw, struct mref_object, FIELD_INT),
META_INI(ref_id, struct mref_object, FIELD_INT),
META_INI(ref_skip_sync, struct mref_object, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_mref_meta);
const struct meta mars_lamport_time_meta[] = {
META_INI(tv_sec, struct lamport_time, FIELD_INT),
META_INI(tv_nsec, struct lamport_time, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_lamport_time_meta);
//////////////////////////////////////////////////////////////
// crypto stuff
#define MD5_DIGEST_SIZE 16
__u32 available_digest_mask = MREF_CHKSUM_MD5_OLD;
__u32 usable_digest_mask = MREF_CHKSUM_MD5_OLD;
__u32 used_log_digest = 0;
__u32 used_net_digest = 0;
#ifdef MARS_HAS_NEW_CRYPTO
/* For now, use shash.
* Later, asynchronous support should be added for full exploitation
* of _parallelizing_ (!) (so-called crypto) hardware.
*/
#include <crypto/hash.h>
/* IMPORTANT:
* Currently, we prefer CRC-like digest algorithms
* in place of "true security". Do not conclude from the infix
* CRYPTO that we would talk about real security. Instead, we are
* talking (in order) about
*
* 1) _reliability_ of data in (long-distance) _distributed_ systems
* 2) performance _penalties_
*
* Over the next years / decades, better hardware support for these goals may
* evolve. Do not blindly believe that everything called "crypto" will be
* valuable for the above goals.
*
* Here is some rough estimate about _candidates_ for the timescale
* of decades:
*
* ~/linux-next.git> grep "config CRYPTO_CRC" $(find . -name "Kconf*")
* ./crypto/Kconfig:config CRYPTO_CRC32C
* ./crypto/Kconfig:config CRYPTO_CRC32C_INTEL
* ./crypto/Kconfig:config CRYPTO_CRC32C_VPMSUM
* ./crypto/Kconfig:config CRYPTO_CRC32C_SPARC64
* ./crypto/Kconfig:config CRYPTO_CRC32
* ./crypto/Kconfig:config CRYPTO_CRC32_PCLMUL
* ./crypto/Kconfig:config CRYPTO_CRC32_MIPS
* ./crypto/Kconfig:config CRYPTO_CRCT10DIF
* ./crypto/Kconfig:config CRYPTO_CRCT10DIF_PCLMUL
* ./crypto/Kconfig:config CRYPTO_CRCT10DIF_VPMSUM
* ./arch/arm64/crypto/Kconfig:config CRYPTO_CRCT10DIF_ARM64_CE
* ./arch/arm/crypto/Kconfig:config CRYPTO_CRCT10DIF_ARM_CE
* ./arch/arm/crypto/Kconfig:config CRYPTO_CRC32_ARM_CE
* ./drivers/crypto/Kconfig:config CRYPTO_CRC32_S390
*
* Please to _not_ extend the current list of digest algorithms with TONS
* of available algorithms, because somebody just "claims" that it were
* a "good" algorithm.
*
* You need to _measure_ on more or less _generic_ SERVER hardware (not on
* Raspberry PI & co) that it actually is faster by at least 30% than the
* currently best CRC32 family.
*
* Please do not bother me with any non- _generalizable_ (!) improvements
* below 30%.
*/
static struct crypto_shash *md5_tfm = NULL;
#ifdef HAS_CRC32C
#define CRC32C_DIGEST_SIZE 4
static struct crypto_shash *crc32c_tfm = NULL;
#endif
#ifdef HAS_CRC32
#define CRC32_DIGEST_SIZE 4
static struct crypto_shash *crc32_tfm = NULL;
#endif
#ifdef HAS_SHA1
#define SHA1_DIGEST_SIZE 20
static struct crypto_shash *sha1_tfm = NULL;
#endif
struct mars_sdesc {
struct shash_desc shash;
char ctx[];
};
#define _GET_ITERATIONS(digest_size) \
(MARS_DIGEST_SIZE / (digest_size))
#define GET_ITERATIONS(digest_size) \
(!(MARS_DIGEST_SIZE % (digest_size)) ? \
_GET_ITERATIONS(digest_size) : \
_GET_ITERATIONS(digest_size) + 1)
/* Note:
* For compatibility to OLD_MARS_DIGEST_SIZE, the higher
* digest bytes up to MARS_DIGEST_SIZE are not exploited
* in this version.
*/
static
long md5_old_digest(void *digest, const void *data, int len)
{
int size = sizeof(struct mars_sdesc) + crypto_shash_descsize(md5_tfm);
struct mars_sdesc *sdesc = brick_mem_alloc(size);
long status;
sdesc->shash.tfm = md5_tfm;
#ifdef MARS_HAS_SHASH_DESC_FLAGS
sdesc->shash.flags = 0;
#endif
memset(digest, 0, MARS_DIGEST_SIZE);
status = crypto_shash_digest(&sdesc->shash, data, len, digest);
if (unlikely(status < 0)) {
MARS_ERR("cannot calculate md5 chksum on %p len=%d, status=%ld\n",
data, len,
status);
memset(digest, 0, MARS_DIGEST_SIZE);
}
brick_mem_free(sdesc);
if (status >= 0)
status = MREF_CHKSUM_MD5_OLD;
return status;
}
static
long md5_digest(void *digest, const void *data, int len)
{
int size = sizeof(struct mars_sdesc) + crypto_shash_descsize(md5_tfm);
struct mars_sdesc *sdesc = brick_mem_alloc(size);
const int iterations = GET_ITERATIONS(MD5_DIGEST_SIZE);
int chunksize = len / iterations;
int offset = 0;
int done_len = len;
int i;
long status = -EINVAL;
sdesc->shash.tfm = md5_tfm;
#ifdef MARS_HAS_SHASH_DESC_FLAGS
sdesc->shash.flags = 0;
#endif
memset(digest, 0, MARS_DIGEST_SIZE);
/* exploit the bigger MARS_DIGEST_SIZE by computing MD5 in chunks */
for (i = 0; i < iterations; i++) {
char this_digest[MD5_DIGEST_SIZE] = {};
status = crypto_shash_digest(&sdesc->shash,
data + offset,
chunksize,
this_digest);
if (unlikely(status < 0)) {
MARS_ERR("cannot calculate md5 chksum on %p len=%d, status=%ld\n",
data,
chunksize,
status);
memset(digest, 0, MARS_DIGEST_SIZE);
break;
}
memcpy(digest + i * MD5_DIGEST_SIZE,
this_digest, MD5_DIGEST_SIZE);
offset += chunksize;
done_len -= chunksize;
}
if (unlikely(done_len)) {
MARS_ERR("md5 chksum remain %d\n", done_len);
status = -EINVAL;
}
brick_mem_free(sdesc);
if (status >= 0)
status = MREF_CHKSUM_MD5;
return status;
}
#ifdef HAS_CRC32C
static
long crc32c_digest(void *digest, const void *data, int len)
{
int size = sizeof(struct mars_sdesc) + crypto_shash_descsize(crc32c_tfm);
struct mars_sdesc *sdesc = brick_mem_alloc(size);
const int iterations = GET_ITERATIONS(CRC32C_DIGEST_SIZE);
int chunksize = len / iterations;
int offset = 0;
int done_len = len;
int i;
int status;
long res = 0;
sdesc->shash.tfm = crc32c_tfm;
#ifdef MARS_HAS_SHASH_DESC_FLAGS
sdesc->shash.flags = 0;
#endif
memset(digest, 0, MARS_DIGEST_SIZE);
/* exploit the bigger MARS_DIGEST_SIZE by computing CRC32C in chunks */
for (i = 0; i < iterations; i++) {
char this_digest[CRC32C_DIGEST_SIZE] = {};
if (i == iterations - 1)
chunksize = done_len;
status = crypto_shash_digest(&sdesc->shash,
data + offset, chunksize,
this_digest);
if (unlikely(status < 0)) {
MARS_ERR("cannot calculate crc32c chksum on %p len=%d, status=%d\n",
data, chunksize,
status);
res = status;
continue;
}
memcpy(digest + i * CRC32C_DIGEST_SIZE, this_digest, CRC32C_DIGEST_SIZE);
offset += chunksize;
done_len -= chunksize;
}
if (unlikely(done_len)) {
MARS_ERR("crc32c chksum remain %d\n", done_len);
res = -EINVAL;
} else if (!res) {
res = MREF_CHKSUM_CRC32C;
}
brick_mem_free(sdesc);
return res;
}
#endif
#ifdef HAS_CRC32
static
long crc32_digest(void *digest, const void *data, int len)
{
int size = sizeof(struct mars_sdesc) + crypto_shash_descsize(crc32_tfm);
struct mars_sdesc *sdesc = brick_mem_alloc(size);
const int iterations = GET_ITERATIONS(CRC32_DIGEST_SIZE);
int chunksize = len / iterations;
int offset = 0;
int done_len = len;
int i;
int status;
long res = 0;
sdesc->shash.tfm = crc32_tfm;
#ifdef MARS_HAS_SHASH_DESC_FLAGS
sdesc->shash.flags = 0;
#endif
memset(digest, 0, MARS_DIGEST_SIZE);
/* exploit the bigger MARS_DIGEST_SIZE by computing CRC32 in chunks */
for (i = 0; i < iterations; i++) {
char this_digest[CRC32_DIGEST_SIZE] = {};
if (i == iterations - 1)
chunksize = done_len;
status = crypto_shash_digest(&sdesc->shash,
data + offset, chunksize,
this_digest);
if (unlikely(status < 0)) {
MARS_ERR("cannot calculate crc32 chksum on %p len=%d, status=%d\n",
data, chunksize,
status);
res = status;
continue;
}
memcpy(digest + i * CRC32_DIGEST_SIZE, this_digest, CRC32_DIGEST_SIZE);
offset += chunksize;
done_len -= chunksize;
}
if (!done_len) {
res = MREF_CHKSUM_CRC32;
} else if (!res) {
MARS_ERR("crc32 chksum remain %d\n", done_len);
res = -EINVAL;
}
brick_mem_free(sdesc);
return res;
}
#endif
#ifdef HAS_SHA1
static
long sha1_digest(void *digest, const void *data, int len)
{
int size = sizeof(struct mars_sdesc) + crypto_shash_descsize(sha1_tfm);
struct mars_sdesc *sdesc = brick_mem_alloc(size);
unsigned char tmp[SHA1_DIGEST_SIZE] = {};
long status;
sdesc->shash.tfm = sha1_tfm;
#ifdef MARS_HAS_SHASH_DESC_FLAGS
sdesc->shash.flags = 0;
#endif
status = crypto_shash_digest(&sdesc->shash, data, len, tmp);
if (unlikely(status < 0)) {
MARS_ERR("cannot calculate sha1 chksum on %p len=%d, status=%ld\n",
data, len,
status);
memset(digest, 0, MARS_DIGEST_SIZE);
} else {
memcpy(digest, tmp, SHA1_DIGEST_SIZE);
memset(digest + SHA1_DIGEST_SIZE, 0,
MARS_DIGEST_SIZE - SHA1_DIGEST_SIZE);
status = MREF_CHKSUM_SHA1;
}
brick_mem_free(sdesc);
return status;
}
#endif
long mars_digest(__u32 digest_flags,
__u32 *used_flags,
void *digest,
const void *data, int len)
{
long res;
bool did_retry = false;
/* The order defines the preference:
* place the most performant algorithms first.
*/
retry:
#ifdef HAS_CRC32C
if (digest_flags & MREF_CHKSUM_CRC32C && crc32c_tfm) {
res = crc32c_digest(digest, data, len);
if (res >= 0) {
if (used_flags)
*used_flags = (__u32)res;
goto done;
}
/* fallthrough to next try */
}
#endif
#ifdef HAS_CRC32
if (digest_flags & MREF_CHKSUM_CRC32 && crc32_tfm) {
res = crc32_digest(digest, data, len);
if (res >= 0) {
if (used_flags)
*used_flags = MREF_CHKSUM_CRC32;
goto done;
}
/* fallthrough to next try */
}
#endif
if (digest_flags & MREF_CHKSUM_MD5 && md5_tfm) {
res = md5_digest(digest, data, len);
if (res >= 0) {
if (used_flags)
*used_flags = MREF_CHKSUM_MD5;
goto done;
}
/* fallthrough to next try */
}
#ifdef HAS_SHA1
if (digest_flags & MREF_CHKSUM_SHA1 && sha1_tfm) {
res = sha1_digest(digest, data, len);
if (res >= 0) {
if (used_flags)
*used_flags = MREF_CHKSUM_SHA1;
goto done;
}
/* fallthrough to next try */
}
#endif
/* always fallback to old md5 regardless of digest_flags */
res = md5_old_digest(digest, data, len);
if (used_flags)
*used_flags = MREF_CHKSUM_MD5_OLD;
/* retry any error, provided the flags can be extended */
if (res < 0 && !did_retry) {
__u32 retry_flags = (usable_digest_mask & ~digest_flags);
if (!retry_flags)
goto done;
did_retry = true;
MARS_WRN("RETRY digest after error=%ld flags: 0x%x &= ~0x%x = 0x%x\n",
res,
usable_digest_mask, digest_flags, retry_flags);
digest_flags = retry_flags;
cond_resched();
goto retry;
}
done:
return res;
}
#ifdef CONFIG_MARS_BENCHMARK
static
__u32 benchmark_digest(char *name, __u32 flags)
{
unsigned char*testpage = kzalloc(PAGE_SIZE, GFP_KERNEL);
unsigned char old_test[MARS_DIGEST_SIZE] = {};
unsigned char new_test[MARS_DIGEST_SIZE];
long long delta;
long res_val;
__u32 res_flags;
__u32 test_flags = flags;
unsigned char bit;
bool report_once = false;
int i;
delta =
TIME_THIS(
for (bit = 1; bit; bit <<= 1) {
for (i = 0; i < PAGE_SIZE; i++) {
testpage[i] ^= bit;
res_val = mars_digest(flags,
NULL,
new_test,
testpage,
PAGE_SIZE);
res_flags = (__u32)res_val;
if (unlikely(res_val < 0 || !(res_flags & flags))) {
MARS_ERR("digest %s failed code=%ld\n",
name, res_val);
res_flags &= flags;
goto err;
}
if (unlikely(!report_once &&
res_flags & ~test_flags)) {
report_once = true;
MARS_INF("digest %s was superseded 0x%x => 0x%x\n",
name, test_flags, res_flags);
test_flags |= res_flags;
}
if (unlikely(!memcmp(old_test, new_test, MARS_DIGEST_SIZE))) {
MARS_ERR("digest %s is not good enough, flags=0x%x\n",
name, res_flags);
goto err;
}
test_flags &= res_flags;
memcpy(old_test, new_test, MARS_DIGEST_SIZE);
}
}
);
printk(KERN_INFO "%-10s digest duration = %12lld ns\n",
name, delta);
res_flags |= test_flags;
err:
kfree(testpage);
res_flags |= MREF_CHKSUM_MD5_OLD;
cond_resched();
return res_flags;
}
#endif
static
int init_mars_digest(void)
{
__u32 checked_digests;
int status;
md5_tfm = crypto_alloc_shash("md5", 0, 0);
if (unlikely(!md5_tfm) || IS_ERR(md5_tfm)) {
MARS_ERR("cannot alloc crypto hash, status=%ld\n",
PTR_ERR(md5_tfm));
md5_tfm = NULL;
return -ELIBACC;
}
status = crypto_shash_digestsize(md5_tfm);
if (unlikely(status != MD5_DIGEST_SIZE)) {
MARS_ERR("md5 bad digest size %d\n", status);
return -ELIBACC;
}
available_digest_mask |= MREF_CHKSUM_MD5;
#ifdef HAS_CRC32C
crc32c_tfm = crypto_alloc_shash("crc32c", 0, 0);
if (unlikely(!crc32c_tfm) || IS_ERR(crc32c_tfm)) {
MARS_ERR("cannot alloc crc32c crypto hash, status=%ld\n",
PTR_ERR(crc32c_tfm));
crc32c_tfm = NULL;
} else {
status = crypto_shash_digestsize(crc32c_tfm);
if (unlikely(status != CRC32C_DIGEST_SIZE)) {
MARS_ERR("crc32c bad digest size %d\n", status);
return -ELIBACC;
}
available_digest_mask |= MREF_CHKSUM_CRC32C;
}
#endif
#ifdef HAS_CRC32
crc32_tfm = crypto_alloc_shash("crc32", 0, 0);
if (unlikely(!crc32_tfm) || IS_ERR(crc32_tfm)) {
MARS_ERR("cannot alloc crc32 crypto hash, status=%ld\n",
PTR_ERR(crc32_tfm));
crc32_tfm = NULL;
} else {
status = crypto_shash_digestsize(crc32_tfm);
if (unlikely(status != CRC32_DIGEST_SIZE)) {
MARS_ERR("crc32 bad digest size %d\n", status);
return -ELIBACC;
}
available_digest_mask |= MREF_CHKSUM_CRC32;
}
#endif
#ifdef HAS_SHA1
sha1_tfm = crypto_alloc_shash("sha1", 0, 0);
if (unlikely(!sha1_tfm) || IS_ERR(sha1_tfm)) {
MARS_ERR("cannot alloc crypto hash, status=%ld\n",
PTR_ERR(sha1_tfm));
sha1_tfm = NULL;
} else {
status = crypto_shash_digestsize(sha1_tfm);
if (unlikely(status != SHA1_DIGEST_SIZE)) {
MARS_ERR("sha1 bad digest size %d\n", status);
return -ELIBACC;
}
available_digest_mask |= MREF_CHKSUM_SHA1;
}
#endif
checked_digests = MREF_CHKSUM_MD5_OLD;
#ifdef CONFIG_MARS_BENCHMARK
/* Side effect of benchmarks:
* Check that configured digests are actually working.
*/
#ifdef HAS_CRC32C
if (crc32c_tfm)
checked_digests |=
benchmark_digest("crc32c", MREF_CHKSUM_CRC32C);
#endif
#ifdef HAS_CRC32
if (crc32_tfm)
checked_digests |=
benchmark_digest("crc32", MREF_CHKSUM_CRC32);
#endif
#ifdef HAS_SHA1
if (sha1_tfm)
checked_digests |=
benchmark_digest("sha1", MREF_CHKSUM_SHA1);
#endif
checked_digests |=
benchmark_digest("md5old", MREF_CHKSUM_MD5_OLD);
if (md5_tfm)
checked_digests |=
benchmark_digest("md5", MREF_CHKSUM_MD5);
#else
/* Without any benchmark results, we need
* to enable all _initialized_ digests for safety.
* If they don't actually work for whatever reason,
* runtime spits may occur ;)
*/
#ifdef HAS_CRC32C
if (crc32c_tfm)
checked_digests |= MREF_CHKSUM_CRC32C;
#endif
#ifdef HAS_CRC32
if (crc32_tfm)
checked_digests |= MREF_CHKSUM_CRC32;
#endif
#ifdef HAS_SHA1
if (sha1_tfm)
checked_digests |= MREF_CHKSUM_SHA1;
#endif
if (md5_tfm)
checked_digests |= MREF_CHKSUM_MD5;
#endif
usable_digest_mask = checked_digests;
return 0;
}
static
void exit_mars_digest(void)
{
if (md5_tfm) {
crypto_free_shash(md5_tfm);
}
#ifdef HAS_CRC32C
if (crc32c_tfm) {
crypto_free_shash(crc32c_tfm);
}
#endif
#ifdef HAS_CRC32
if (crc32_tfm) {
crypto_free_shash(crc32_tfm);
}
#endif
#ifdef HAS_SHA1
if (sha1_tfm) {
crypto_free_shash(sha1_tfm);
}
#endif
}
#else /* MARS_HAS_NEW_CRYPTO */
/* Old implementation, to disappear.
* Was a quick'n dirty lab prototype with unnecessary
* global variables and locking.
*/
#define OBSOLETE_TFM_MAX 128
static struct crypto_hash *mars_tfm[OBSOLETE_TFM_MAX];
static struct semaphore tfm_sem[OBSOLETE_TFM_MAX];
long mars_digest(__u32 digest_flags,
__u32 *used_flags,
void *digest,
void *data, int len)
{
static unsigned int round_robin = 0;
unsigned int i = round_robin++ % OBSOLETE_TFM_MAX;
struct hash_desc desc = {
.tfm = mars_tfm[i],
#ifdef MARS_HAS_SHASH_DESC_FLAGS
.flags = 0,
#endif
};
struct scatterlist sg;
memset(digest, 0, MARS_DIGEST_SIZE);
down(&tfm_sem[i]);
crypto_hash_init(&desc);
sg_init_table(&sg, 1);
sg_set_buf(&sg, data, len);
crypto_hash_update(&desc, &sg, sg.length);
crypto_hash_final(&desc, digest);
up(&tfm_sem[i]);
if (used_flags)
*used_flags = MREF_CHKSUM_MD5_OLD;
return MREF_CHKSUM_MD5_OLD;
}
#endif /* MARS_HAS_NEW_CRYPTO */
void mref_checksum(struct mref_object *mref)
{
unsigned char checksum[MARS_DIGEST_SIZE];
__u32 digest_flags;
int len;
digest_flags = mref->ref_flags & MREF_CHKSUM_ANY;
if (!digest_flags || !mref->ref_data)
return;
digest_flags =
mars_digest(digest_flags,
&used_net_digest,
checksum,
mref->ref_data, mref->ref_len);
mref->ref_flags = (mref->ref_flags & ~MREF_CHKSUM_ANY) | digest_flags;
len = sizeof(mref->ref_checksum);
if (len > MARS_DIGEST_SIZE)
len = MARS_DIGEST_SIZE;
memcpy(&mref->ref_checksum, checksum, len);
}
/*******************************************************************/
/* compression */
int compress_overhead = 0;
__u32 available_compression_mask =
#ifdef HAS_LZO
MREF_COMPRESS_LZO |
#endif
#ifdef HAS_LZ4
MREF_COMPRESS_LZ4 |
#endif
#ifdef HAS_ZLIB
MREF_COMPRESS_ZLIB |
#endif
0;
__u32 usable_compression_mask = 0;
__u32 used_compression = 0;
int mars_zlib_compression_level = 3;
int mars_compress(void *src_data,
int src_len,
void *dst_data,
int dst_len,
__u32 check_flags,
__u32 *result_flags)
{
void *tmp_buf = dst_data;
int res = 0;
check_flags &= usable_compression_mask;
if (!(check_flags & MREF_COMPRESS_ANY)) {
used_compression = 0;
return 0;
}
if (unlikely(src_len > MARS_MAX_COMPR_SIZE)) {
MARS_ERR("tryping to compress %d, more than %ld bytes\n",
src_len, MARS_MAX_COMPR_SIZE);
goto done;
}
/* The order determines the preferences */
#ifdef HAS_LZO
if (check_flags & MREF_COMPRESS_LZO) {
int max_len = lzo1x_worst_compress(src_len);
void *wrkmem;
size_t res_len = 0;
int status;
if (!dst_data) {
tmp_buf = brick_mem_alloc(max_len);
} else if (dst_len < max_len) {
return -ENOSPC;
}
wrkmem = brick_mem_alloc(LZO1X_1_MEM_COMPRESS);
status = lzo1x_1_compress(src_data, src_len,
tmp_buf, &res_len, wrkmem);
/* ensure that the result is really smaller */
if (status == LZO_E_OK &&
res_len > 0 &&
res_len <= dst_len) {
used_compression = MREF_COMPRESS_LZO;
*result_flags |= MREF_COMPRESS_LZO;
res = res_len;
/*
* TODO: avoid memcpy() by swizzling the src_data pointer
*/
if (!dst_data)
memcpy(src_data, tmp_buf, res_len);
}
brick_mem_free(wrkmem);
/* do not try other compression methods */
goto done;
}
#endif
#ifdef HAS_LZ4
if (check_flags & MREF_COMPRESS_LZ4) {
#ifdef HAS_FAST_LZ4
size_t max_len = LZ4_COMPRESSBOUND(src_len);
#else
size_t max_len = lz4_compressbound(src_len);
#endif
size_t res_len = 0;
void *wrkmem;
int status;
if (!dst_data) {
tmp_buf = brick_mem_alloc(max_len);
} else if (dst_len < max_len) {
return -ENOSPC;
}
wrkmem = brick_block_alloc(0, LZ4_MEM_COMPRESS);
#ifdef HAS_FAST_LZ4
res_len = LZ4_compress_fast(src_data,
tmp_buf,
src_len,
max_len,
LZ4_ACCELERATION_DEFAULT,
wrkmem);
status = 0;
#else
status = lz4_compress(src_data, src_len,
tmp_buf, &res_len,
wrkmem);
#endif
if (likely(!status &&
res_len > 0 &&
res_len <= dst_len)) {
used_compression = MREF_COMPRESS_LZ4;
*result_flags |= MREF_COMPRESS_LZ4;
res = res_len;
/*
* TODO: avoid memcpy() by swizzling the src_data pointer
*/
if (!dst_data)
memcpy(src_data, tmp_buf, res_len);
}
brick_block_free(wrkmem, LZ4_MEM_COMPRESS);
/* do not try other compression methods */
goto done;
}
#endif
#ifdef HAS_ZLIB
if (check_flags & MREF_COMPRESS_ZLIB) {
size_t zlib_deflate_wrk_size = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL);
struct z_stream_s stream = {
.workspace = brick_mem_alloc(zlib_deflate_wrk_size),
};
int status;
if (!dst_data) {
tmp_buf = brick_mem_alloc(src_len);
} else if (dst_len < src_len) {
return -ENOSPC;
}
status = zlib_deflateInit(&stream, mars_zlib_compression_level);
if (unlikely(status != Z_OK)) {
MARS_ERR("cannot init zlib compression stream\n");
goto zlib_err;
}
stream.next_in = src_data;
stream.avail_in = src_len;
stream.next_out = tmp_buf;
stream.avail_out = src_len;
status = zlib_deflate(&stream, Z_FINISH);
if (status != Z_STREAM_END)
goto zlib_err;
status = zlib_deflateEnd(&stream);
if (status == Z_OK &&
stream.total_out <= dst_len) {
used_compression = MREF_COMPRESS_ZLIB;
*result_flags |= MREF_COMPRESS_ZLIB;
res = stream.total_out;
/*
* TODO: avoid memcpy() by swizzling the src_data pointer
*/
if (!dst_data)
memcpy(src_data, tmp_buf, stream.total_out);
}
zlib_err:
brick_mem_free(stream.workspace);
/* do not try other compression methods */
goto done;
}
#endif
used_compression = 0;
done:
if (!dst_data)
brick_mem_free(tmp_buf);
return res;
}
void *mars_decompress(void *src_data,
int src_len,
void *dst_data,
int dst_len,
__u32 check_flags)
{
void *res_buf = dst_data;
if (!res_buf)
res_buf = brick_mem_alloc(dst_len);
#ifdef HAS_LZO
if (check_flags & MREF_COMPRESS_LZO) {
size_t res_len = dst_len;
int status;
status = lzo1x_decompress_safe(src_data, src_len,
res_buf, &res_len);
if (status == LZO_E_OK && dst_len == res_len)
goto done;
MARS_ERR("bad LZO decompression from %d to %ld bytes (requested %d)\n",
src_len, res_len, dst_len);
goto err;
}
#endif
#ifdef HAS_LZ4
if (check_flags & MREF_COMPRESS_LZ4) {
size_t new_len = src_len;
int status = 0;
#ifdef HAS_FAST_LZ4
new_len = LZ4_decompress_safe(src_data,
res_buf,
src_len,
dst_len);
if (!status && new_len == dst_len)
goto done;
MARS_ERR("bad LZ4 decompression %d to %lu != %d bytes\n",
src_len, new_len, dst_len);
#else
status = lz4_decompress(src_data, &new_len,
res_buf, dst_len);
if (!status && new_len == src_len)
goto done;
MARS_ERR("bad LZ4 decompression %d != %lu to %d bytes\n",
src_len, new_len, dst_len);
#endif
goto err;
}
#endif
#ifdef HAS_ZLIB
if (check_flags & MREF_COMPRESS_ZLIB) {
size_t zlib_inflate_wrk_size = zlib_inflate_workspacesize();
struct z_stream_s stream = {
.workspace = brick_mem_alloc(zlib_inflate_wrk_size),
};
int status;
status = zlib_inflateInit(&stream);
if (unlikely(status != Z_OK)) {
MARS_ERR("cannot init zlib decompression stream\n");
goto zlib_err;
}
stream.next_in = src_data;
stream.avail_in = src_len;
stream.next_out = res_buf;
stream.avail_out = dst_len;
status = zlib_inflate(&stream, Z_FINISH);
if (unlikely(status != Z_STREAM_END)) {
MARS_ERR("bad ZLIB decompression %d (requested %d)\n",
src_len, dst_len);
goto zlib_err;
}
status = zlib_inflateEnd(&stream);
if (likely(status == Z_OK)) {
brick_mem_free(stream.workspace);
goto done;
}
MARS_ERR("unfinished ZLIB decompression %d (requested %d)\n",
src_len, dst_len);
zlib_err:
brick_mem_free(stream.workspace);
goto err;
}
#endif
MARS_ERR("decompression not compiled into kernel module\n");
err:
if (!dst_data)
brick_mem_free(res_buf);
res_buf = NULL;
done:
return res_buf;
}
#ifdef CONFIG_MARS_BENCHMARK
#define MARS_CLEAN_SIZE 256
static
void make_fake_page(__u32 *testpage)
{
int i;
/* some fake compression data */
for (i = 0; i < PAGE_SIZE / sizeof(__u32); i++)
testpage[i] = (__u32)i;
}
static
void benchmark_compress(char *name, __u32 flags)
{
void *testpage = kmalloc(PAGE_SIZE, GFP_KERNEL);
__u32 result_flags;
long long delta;
int status;
int i;
usable_compression_mask = MREF_COMPRESS_ANY;
make_fake_page(testpage);
delta = TIME_THIS(
for (i = 0; i < 10000; i++) {
memset(testpage, 0, MARS_CLEAN_SIZE);
result_flags = 0;
status =
mars_compress(testpage, PAGE_SIZE,
NULL, PAGE_SIZE + compress_overhead,
flags, &result_flags);
if (unlikely(status <= 0) || !(flags & result_flags)) {
MARS_ERR("%s compress failure, status=%d, flags=%x\n",
name, status, result_flags);
goto err;
}
}
);
printk(KERN_INFO "%-8s compress duration = %12lld ns\n", name, delta);
err:
kfree(testpage);
usable_compression_mask = 0;
}
#endif
static
int init_mars_compress(void)
{
int max_len = 0;
#ifdef HAS_LZO
max_len = lzo1x_worst_compress(MARS_MAX_COMPR_SIZE) - MARS_MAX_COMPR_SIZE;
if (max_len > compress_overhead)
compress_overhead = max_len;
#endif
#ifdef HAS_LZ4
#ifdef HAS_FAST_LZ4
max_len = LZ4_COMPRESSBOUND(MARS_MAX_COMPR_SIZE) - MARS_MAX_COMPR_SIZE;
#else
max_len = lz4_compressbound(MARS_MAX_COMPR_SIZE) - MARS_MAX_COMPR_SIZE;
#endif
if (max_len > compress_overhead)
compress_overhead = max_len;
#endif
#ifdef CONFIG_MARS_BENCHMARK
#ifdef HAS_LZO
benchmark_compress("lzo", MREF_COMPRESS_LZO);
#endif
#ifdef HAS_LZ4
benchmark_compress("lz4", MREF_COMPRESS_LZ4);
#endif
#ifdef HAS_ZLIB
benchmark_compress("zlib", MREF_COMPRESS_ZLIB);
#endif
(void)benchmark_compress;
#endif
return 0;
}
static
void exit_mars_compress(void)
{
}
/////////////////////////////////////////////////////////////////////
// tracing
#ifdef MARS_TRACING
unsigned long long start_trace_clock = 0;
EXPORT_SYMBOL_GPL(start_trace_clock);
struct file *mars_log_file = NULL;
loff_t mars_log_pos = 0;
void _mars_log(char *buf, int len)
{
static DEFINE_MUTEX(trace_lock);
#ifdef MARS_HAS_KERNEL_READ
mutex_lock(&trace_lock);
(void)kernel_write(mars_log_file,
buf,
len,
&mars_log_pos);
mutex_unlock(&trace_lock);
#else
mm_segment_t oldfs;
oldfs = get_fs();
set_fs(KERNEL_DS);
mutex_lock(&trace_lock);
vfs_write(mars_log_file, buf, len, &mars_log_pos);
mutex_unlock(&trace_lock);
set_fs(oldfs);
#endif
}
EXPORT_SYMBOL_GPL(_mars_log);
void mars_log(const char *fmt, ...)
{
char *buf = brick_string_alloc(0);
va_list args;
int len;
if (!buf)
return;
va_start(args, fmt);
len = vscnprintf(buf, PAGE_SIZE, fmt, args);
va_end(args);
_mars_log(buf, len);
brick_string_free(buf);
}
EXPORT_SYMBOL_GPL(mars_log);
void mars_trace(struct mref_object *mref, const char *info)
{
int index = mref->ref_traces;
if (likely(index < MAX_TRACES)) {
mref->ref_trace_stamp[index] = cpu_clock(raw_smp_processor_id());
mref->ref_trace_info[index] = info;
mref->ref_traces++;
}
}
EXPORT_SYMBOL_GPL(mars_trace);
void mars_log_trace(struct mref_object *mref)
{
char *buf = brick_string_alloc(0);
unsigned long long old;
unsigned long long diff;
int i;
int len;
if (!buf) {
return;
}
if (!mars_log_file || !mref->ref_traces) {
goto done;
}
if (!start_trace_clock) {
start_trace_clock = mref->ref_trace_stamp[0];
}
diff = mref->ref_trace_stamp[mref->ref_traces-1] - mref->ref_trace_stamp[0];
len = scnprintf(buf, PAGE_SIZE, "%c ;%12lld ;%6d;%10llu",
mref->ref_flags & MREF_WRITE ? 'W' : 'R',
mref->ref_pos, mref->ref_len, diff / 1000);
old = start_trace_clock;
for (i = 0; i < mref->ref_traces; i++) {
diff = mref->ref_trace_stamp[i] - old;
len += scnprintf(buf + len, PAGE_SIZE - len, " ; %s ;%10llu", mref->ref_trace_info[i], diff / 1000);
old = mref->ref_trace_stamp[i];
}
len +=scnprintf(buf + len, PAGE_SIZE - len, "\n");
_mars_log(buf, len);
done:
brick_string_free(buf);
mref->ref_traces = 0;
}
EXPORT_SYMBOL_GPL(mars_log_trace);
#endif // MARS_TRACING
/////////////////////////////////////////////////////////////////////
// power led handling
void mars_power_led_on(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_on;
if (val != oldval) {
//MARS_DBG("brick '%s' type '%s' led_on %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_on(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_on);
void mars_power_led_off(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_off;
if (val != oldval) {
//MARS_DBG("brick '%s' type '%s' led_off %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_off(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_off);
/////////////////////////////////////////////////////////////////////
// init stuff
struct mm_struct *mm_fake = NULL;
EXPORT_SYMBOL_GPL(mm_fake);
struct task_struct *mm_fake_task = NULL;
atomic_t mm_fake_count = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(mm_fake_count);
int __init init_mars(void)
{
int status;
MARS_INF("init_mars()\n");
set_fake();
#ifdef MARS_TRACING
{
int flags = O_CREAT | O_TRUNC | O_RDWR | O_LARGEFILE;
int prot = 0600;
mm_segment_t oldfs;
oldfs = get_fs();
set_fs(KERNEL_DS);
mars_log_file = filp_open("/mars/trace.csv", flags, prot);
set_fs(oldfs);
if (IS_ERR(mars_log_file)) {
MARS_ERR("cannot create trace logfile, status = %ld\n", PTR_ERR(mars_log_file));
mars_log_file = NULL;
}
}
#endif
#ifdef MARS_HAS_NEW_CRYPTO
status = init_mars_digest();
if (unlikely(status))
return status;
#else /* MARS_HAS_NEW_CRYPTO */
{
int i;
for (i = 0; i < OBSOLETE_TFM_MAX; i++) {
sema_init(&tfm_sem[i], 1);
mars_tfm[i] = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (!mars_tfm[i]) {
MARS_ERR("cannot alloc crypto hash\n");
return -ENOMEM;
}
if (IS_ERR(mars_tfm)) {
MARS_ERR("alloc crypto hash failed, status = %d\n", (int)PTR_ERR(mars_tfm));
return PTR_ERR(mars_tfm);
}
}
}
#if 0
if (crypto_tfm_alg_type(crypto_hash_tfm(mars_tfm)) != CRYPTO_ALG_TYPE_DIGEST) {
MARS_ERR("bad crypto hash type\n");
return -EINVAL;
}
#endif
status = crypto_hash_digestsize(mars_tfm[0]);
MARS_INF("digest_size = %d\n", status);
if (unlikely(status != MARS_DIGEST_SIZE)) {
MARS_ERR("bad md5 crypto hash size %d\n", status);
return -EINVAL;
}
#endif /* MARS_HAS_NEW_CRYPTO */
init_mars_compress();
return 0;
}
void exit_mars(void)
{
MARS_INF("exit_mars()\n");
put_fake();
exit_mars_compress();
#ifdef MARS_HAS_NEW_CRYPTO
exit_mars_digest();
#else /* MARS_HAS_NEW_CRYPTO */
if (mars_tfm[0]) {
int i;
for (i = 0; i < OBSOLETE_TFM_MAX; i++)
crypto_free_hash(mars_tfm[i]);
}
#endif /* MARS_HAS_NEW_CRYPTO */
#ifdef MARS_TRACING
if (mars_log_file) {
filp_close(mars_log_file, NULL);
mars_log_file = NULL;
}
#endif
}