mirror of https://github.com/schoebel/mars
1079 lines
29 KiB
C
1079 lines
29 KiB
C
/*
|
|
* MARS Long Distance Replication Software
|
|
*
|
|
* This file is part of MARS project: http://schoebel.github.io/mars/
|
|
*
|
|
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
|
* Copyright (C) 2011-2014 1&1 Internet AG
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/delay.h>
|
|
|
|
#include <asm/atomic.h>
|
|
|
|
#include "brick_mem.h"
|
|
#include "brick_say.h"
|
|
#include "brick_locks.h"
|
|
#include "lamport.h"
|
|
#include "buildtag.h"
|
|
|
|
#define USE_KERNEL_PAGES // currently mandatory (vmalloc does not work)
|
|
|
|
#define MAGIC_BLOCK (int)0x8B395D7B
|
|
#define MAGIC_BEND (int)0x8B395D7C
|
|
#define MAGIC_MEM1 (int)0x8B395D7D
|
|
#define MAGIC_MEM2 (int)0x9B395D8D
|
|
#define MAGIC_MEND1 (int)0x8B395D7E
|
|
#define MAGIC_MEND2 (int)0x9B395D8E
|
|
#define MAGIC_STR (int)0x8B395D7F
|
|
#define MAGIC_SEND (int)0x9B395D8F
|
|
|
|
#define INT_ACCESS(ptr,offset) (*(int*)(((char*)(ptr)) + (offset)))
|
|
|
|
/* This part is historic.
|
|
* To disappear in the long term.
|
|
* When CONFIG_MARS_DEBUG_DEVEL_VIA_SAY is unset, an empty .o
|
|
* should be created.
|
|
*/
|
|
#ifdef CONFIG_MARS_DEBUG_DEVEL_VIA_SAY
|
|
|
|
#define _BRICK_FMT(_fmt,_class) \
|
|
"%lld.%09ld %lld.%09ld MEM_%-5s %s[%d] %s:%d %s(): " \
|
|
_fmt, \
|
|
(s64)_s_now.tv_sec, _s_now.tv_nsec, \
|
|
(s64)_l_now.tv_sec, _l_now.tv_nsec, \
|
|
say_class[_class], \
|
|
current->comm, (int)smp_processor_id(), \
|
|
__BASE_FILE__, \
|
|
__LINE__, \
|
|
__FUNCTION__
|
|
|
|
#define _BRICK_MSG(_class, _dump, _fmt, _args...) \
|
|
do { \
|
|
struct lamport_time _s_now; \
|
|
struct lamport_time _l_now; \
|
|
\
|
|
get_lamport(&_s_now, &_l_now); \
|
|
say(_class, _BRICK_FMT(_fmt, _class), ##_args); \
|
|
if (_dump) dump_stack(); \
|
|
} while (0)
|
|
|
|
#define BRICK_COND_ERR(_cond, _fmt, _args...) \
|
|
_BRICK_MSG((_cond) ? SAY_ERROR : SAY_INFO, (_cond), _fmt, ##_args)
|
|
|
|
#define BRICK_ERR(_fmt, _args...) _BRICK_MSG(SAY_ERROR, true, _fmt, ##_args)
|
|
#define BRICK_WRN(_fmt, _args...) _BRICK_MSG(SAY_WARN, false, _fmt, ##_args)
|
|
#define BRICK_INF(_fmt, _args...) _BRICK_MSG(SAY_INFO, false, _fmt, ##_args)
|
|
|
|
#else /* CONFIG_MARS_DEBUG_DEVEL_VIA_SAY */
|
|
|
|
/* empty macros, as far as necessary */
|
|
#define _BRICK_FMT(_args...) /*empty*/
|
|
#define _BRICK_MSG(_args...) /*empty*/
|
|
#define BRICK_COND_ERR(_args...) /*empty*/
|
|
#define BRICK_ERR(_args...) /*empty*/
|
|
#define BRICK_WRN(_args...) /*empty*/
|
|
#define BRICK_INF(_args...) /*empty*/
|
|
|
|
#endif /* CONFIG_MARS_DEBUG_DEVEL_VIA_SAY */
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// limit handling
|
|
|
|
#include <linux/swap.h>
|
|
|
|
long long brick_global_memavail = 0;
|
|
EXPORT_SYMBOL_GPL(brick_global_memavail);
|
|
long long brick_global_memlimit = 0;
|
|
EXPORT_SYMBOL_GPL(brick_global_memlimit);
|
|
atomic64_t brick_global_block_used = ATOMIC64_INIT(0);
|
|
EXPORT_SYMBOL_GPL(brick_global_block_used);
|
|
|
|
static
|
|
void get_total_ram(void)
|
|
{
|
|
struct sysinfo i = {};
|
|
|
|
si_meminfo(&i);
|
|
brick_global_memavail = (long long)i.totalram * (PAGE_SIZE / 1024);
|
|
BRICK_INF("total RAM = %lld [KiB]\n", brick_global_memavail);
|
|
}
|
|
|
|
/* Use the safe msleep_interruptible() from the upstream kernel.
|
|
* In addition to a linear backoff algorithms for dynamic
|
|
* CPU giveup, we allow negative arguments at the very first start
|
|
* of a polling-like cycle, more similar to cond_resched() behaviour.
|
|
*
|
|
* This is very useful in OOM-like situations, in order to allow
|
|
* other parts of the system to recover their operations.
|
|
* CAVEAT: the sleeps are rather long, so please use this only
|
|
* in desperate situations (when other measures have failed).
|
|
*/
|
|
void msleep_backoff(int *ms)
|
|
{
|
|
if (*ms < 0) {
|
|
*ms = 0;
|
|
return;
|
|
}
|
|
|
|
flush_signals(current);
|
|
msleep_interruptible(*ms);
|
|
|
|
/* Normally, we add only 1 jiffie per round, speculating
|
|
* that this will catch practically all usual cases in
|
|
* contemporary hardware stuff.
|
|
* Only when this speculation has proven wrong, we
|
|
* accelerate the linear slope somewhat.
|
|
*/
|
|
if (*ms < 100)
|
|
*ms += 1000 / HZ;
|
|
else if (*ms < 1000)
|
|
*ms += 10;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// small memory allocation (use this only for len < PAGE_SIZE)
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
static atomic_t phys_mem_alloc = ATOMIC_INIT(0);
|
|
static atomic_t mem_redirect_alloc = ATOMIC_INIT(0);
|
|
static atomic_t mem_count[BRICK_DEBUG_MEM] = {};
|
|
static atomic_t mem_free[BRICK_DEBUG_MEM] = {};
|
|
static int mem_len[BRICK_DEBUG_MEM] = {};
|
|
#define PLUS_SIZE (6 * sizeof(int))
|
|
#else
|
|
#define PLUS_SIZE (2 * sizeof(int))
|
|
#endif
|
|
|
|
static inline
|
|
void *__brick_mem_alloc(int len)
|
|
{
|
|
void *res;
|
|
if (len >= PAGE_SIZE) {
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_inc(&mem_redirect_alloc);
|
|
#endif
|
|
res = _brick_block_alloc(0, len, 0);
|
|
} else {
|
|
int ms = 0;
|
|
|
|
for (;;) {
|
|
res = kmalloc(len, GFP_BRICK);
|
|
if (likely(res))
|
|
break;
|
|
msleep_backoff(&ms);
|
|
}
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_inc(&phys_mem_alloc);
|
|
#endif
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static inline
|
|
void __brick_mem_free(void *data, int len)
|
|
{
|
|
if (len >= PAGE_SIZE) {
|
|
_brick_block_free(data, len, 0);
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_dec(&mem_redirect_alloc);
|
|
#endif
|
|
} else {
|
|
kfree(data);
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_dec(&phys_mem_alloc);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void *_brick_mem_alloc(int len, int line)
|
|
{
|
|
void *res;
|
|
#ifdef CONFIG_MARS_DEBUG
|
|
might_sleep();
|
|
#endif
|
|
|
|
res = __brick_mem_alloc(len + PLUS_SIZE);
|
|
|
|
if (likely(res)) {
|
|
#ifdef BRICK_DEBUG_MEM
|
|
if (unlikely(line < 0))
|
|
line = 0;
|
|
else if (unlikely(line >= BRICK_DEBUG_MEM))
|
|
line = BRICK_DEBUG_MEM - 1;
|
|
INT_ACCESS(res, 0 * sizeof(int)) = MAGIC_MEM1;
|
|
INT_ACCESS(res, 1 * sizeof(int)) = len;
|
|
INT_ACCESS(res, 2 * sizeof(int)) = line;
|
|
INT_ACCESS(res, 3 * sizeof(int)) = MAGIC_MEM2;
|
|
res += 4 * sizeof(int);
|
|
INT_ACCESS(res, len + 0 * sizeof(int)) = MAGIC_MEND1;
|
|
INT_ACCESS(res, len + 1 * sizeof(int)) = MAGIC_MEND2;
|
|
atomic_inc(&mem_count[line]);
|
|
mem_len[line] = len;
|
|
#else
|
|
INT_ACCESS(res, 0 * sizeof(int)) = len;
|
|
res += PLUS_SIZE;
|
|
#endif
|
|
}
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_mem_alloc);
|
|
|
|
void _brick_mem_free(void *data, int cline)
|
|
{
|
|
#ifdef BRICK_DEBUG_MEM
|
|
void *test = data - 4 * sizeof(int);
|
|
int magic1= INT_ACCESS(test, 0 * sizeof(int));
|
|
int len = INT_ACCESS(test, 1 * sizeof(int));
|
|
int line = INT_ACCESS(test, 2 * sizeof(int));
|
|
int magic2= INT_ACCESS(test, 3 * sizeof(int));
|
|
if (unlikely(magic1 != MAGIC_MEM1)) {
|
|
BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n", cline, magic1, MAGIC_MEM1, len);
|
|
return;
|
|
}
|
|
if (unlikely(magic2 != MAGIC_MEM2)) {
|
|
BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n", cline, magic2, MAGIC_MEM2, len);
|
|
return;
|
|
}
|
|
if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
|
|
BRICK_ERR("line %d memory corruption: alloc line = %d, len = %d\n", cline, line, len);
|
|
return;
|
|
}
|
|
INT_ACCESS(test, 0) = 0xffffffff;
|
|
magic1 = INT_ACCESS(data, len + 0 * sizeof(int));
|
|
if (unlikely(magic1 != MAGIC_MEND1)) {
|
|
BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n", cline, magic1, MAGIC_MEND1, len);
|
|
return;
|
|
}
|
|
magic2 = INT_ACCESS(data, len + 1 * sizeof(int));
|
|
if (unlikely(magic2 != MAGIC_MEND2)) {
|
|
BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n", cline, magic2, MAGIC_MEND2, len);
|
|
return;
|
|
}
|
|
INT_ACCESS(data, len) = 0xffffffff;
|
|
atomic_dec(&mem_count[line]);
|
|
atomic_inc(&mem_free[line]);
|
|
#else
|
|
void *test = data - PLUS_SIZE;
|
|
int len = INT_ACCESS(test, 0 * sizeof(int));
|
|
#endif
|
|
data = test;
|
|
__brick_mem_free(data, len + PLUS_SIZE);
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_mem_free);
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// string memory allocation
|
|
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
# define STRING_CANARY \
|
|
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
|
|
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
|
|
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
|
|
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
|
|
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
|
|
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
|
|
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
|
|
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
|
|
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
|
|
" BUILDTAG = " BUILDTAG \
|
|
" BUILDHOST = " BUILDHOST \
|
|
" BUILDDATE = " BUILDDATE \
|
|
" FILE = " __FILE__ \
|
|
" VERSION = " __VERSION__ \
|
|
" xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx STRING_error xxx\n"
|
|
# define STRING_PLUS (sizeof(int) * 3 + sizeof(STRING_CANARY))
|
|
#elif defined(BRICK_DEBUG_MEM)
|
|
# define STRING_PLUS (sizeof(int) * 4)
|
|
#else
|
|
# define STRING_PLUS 0
|
|
#endif
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
static atomic_t phys_string_alloc = ATOMIC_INIT(0);
|
|
static atomic_t string_count[BRICK_DEBUG_MEM] = {};
|
|
static atomic_t string_free[BRICK_DEBUG_MEM] = {};
|
|
#endif
|
|
|
|
char *_brick_string_alloc(int len, int line)
|
|
{
|
|
int ms = 0;
|
|
char *res;
|
|
|
|
#ifdef CONFIG_MARS_DEBUG
|
|
might_sleep();
|
|
if (unlikely(len > PAGE_SIZE)) {
|
|
BRICK_WRN("line = %d string too long: len = %d\n", line, len);
|
|
}
|
|
#endif
|
|
if (len <= 0) {
|
|
len = BRICK_STRING_LEN;
|
|
}
|
|
|
|
for (;;) {
|
|
res = kzalloc(len + STRING_PLUS, GFP_BRICK);
|
|
if (likely(res))
|
|
break;
|
|
msleep_backoff(&ms);
|
|
}
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
if (likely(res)) {
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
memset(res + 1, '?', len - 1);
|
|
#endif
|
|
atomic_inc(&phys_string_alloc);
|
|
if (unlikely(line < 0))
|
|
line = 0;
|
|
else if (unlikely(line >= BRICK_DEBUG_MEM))
|
|
line = BRICK_DEBUG_MEM - 1;
|
|
INT_ACCESS(res, 0) = MAGIC_STR;
|
|
INT_ACCESS(res, sizeof(int)) = len;
|
|
INT_ACCESS(res, sizeof(int) * 2) = line;
|
|
res += sizeof(int) * 3;
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
strcpy(res + len, STRING_CANARY);
|
|
#else
|
|
INT_ACCESS(res, len) = MAGIC_SEND;
|
|
#endif
|
|
atomic_inc(&string_count[line]);
|
|
}
|
|
#endif
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_string_alloc);
|
|
|
|
void _brick_string_free(const char *data, int cline)
|
|
{
|
|
#ifdef BRICK_DEBUG_MEM
|
|
int magic;
|
|
int len;
|
|
int line;
|
|
char *orig = (void*)data;
|
|
|
|
data -= sizeof(int) * 3;
|
|
magic = INT_ACCESS(data, 0);
|
|
if (unlikely(magic != MAGIC_STR)) {
|
|
BRICK_ERR("cline %d stringmem corruption: magix %08x != %08x\n", cline, magic, MAGIC_STR);
|
|
return;
|
|
}
|
|
len = INT_ACCESS(data, sizeof(int));
|
|
line = INT_ACCESS(data, sizeof(int) * 2);
|
|
if (unlikely(len <= 0)) {
|
|
BRICK_ERR("cline %d stringmem corruption: line = %d len = %d\n", cline, line, len);
|
|
return;
|
|
}
|
|
if (unlikely(len > PAGE_SIZE)) {
|
|
BRICK_ERR("cline %d string too long: line = %d len = %d string='%s'\n", cline, line, len, orig);
|
|
}
|
|
if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
|
|
BRICK_ERR("cline %d stringmem corruption: line = %d (len = %d)\n", cline, line, len);
|
|
return;
|
|
}
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
if (unlikely(strcmp(orig + len, STRING_CANARY))) {
|
|
BRICK_ERR("cline %d stringmem corruption: bad canary '%s', line = %d len = %d\n",
|
|
cline, STRING_CANARY, line, len);
|
|
return;
|
|
}
|
|
orig[len]--;
|
|
memset(orig, '!', len);
|
|
#else
|
|
magic = INT_ACCESS(orig, len);
|
|
if (unlikely(magic != MAGIC_SEND)) {
|
|
BRICK_ERR("cline %d stringmem corruption: end_magix %08x != %08x, line = %d len = %d\n",
|
|
cline, magic, MAGIC_SEND, line, len);
|
|
return;
|
|
}
|
|
INT_ACCESS(orig, len) = 0xffffffff;
|
|
#endif
|
|
atomic_dec(&string_count[line]);
|
|
atomic_inc(&string_free[line]);
|
|
atomic_dec(&phys_string_alloc);
|
|
#endif
|
|
kfree(data);
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_string_free);
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// block memory allocation
|
|
|
|
static
|
|
int len2order(int len)
|
|
{
|
|
int order = 0;
|
|
|
|
if (unlikely(len <= 0)) {
|
|
BRICK_ERR("trying to use %d bytes\n", len);
|
|
return 0;
|
|
}
|
|
|
|
while ((PAGE_SIZE << order) < len)
|
|
order++;
|
|
|
|
if (unlikely(order > BRICK_MAX_ORDER)) {
|
|
BRICK_ERR("trying to use %d bytes (oder = %d, max = %d)\n", len, order, BRICK_MAX_ORDER);
|
|
return BRICK_MAX_ORDER;
|
|
}
|
|
return order;
|
|
}
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
static atomic_t _alloc_count[BRICK_MAX_ORDER+1] = {};
|
|
int brick_mem_alloc_count[BRICK_MAX_ORDER+1] = {};
|
|
EXPORT_SYMBOL_GPL(brick_mem_alloc_count);
|
|
int brick_mem_alloc_max[BRICK_MAX_ORDER+1] = {};
|
|
EXPORT_SYMBOL_GPL(brick_mem_alloc_max);
|
|
int brick_mem_freelist_max[BRICK_MAX_ORDER+1] = {};
|
|
EXPORT_SYMBOL_GPL(brick_mem_freelist_max);
|
|
#endif
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
static atomic_t phys_block_alloc = ATOMIC_INIT(0);
|
|
// indexed by line
|
|
static atomic_t block_count[BRICK_DEBUG_MEM] = {};
|
|
static atomic_t block_free[BRICK_DEBUG_MEM] = {};
|
|
static int block_len[BRICK_DEBUG_MEM] = {};
|
|
// indexed by order
|
|
static atomic_t op_count[BRICK_MAX_ORDER+1] = {};
|
|
static atomic_t raw_count[BRICK_MAX_ORDER+1] = {};
|
|
static int alloc_line[BRICK_MAX_ORDER+1] = {};
|
|
static int alloc_len[BRICK_MAX_ORDER+1] = {};
|
|
#endif
|
|
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
|
|
#define MAX_INFO_LISTS 1024
|
|
|
|
#define INFO_LIST_HASH(addr) ((unsigned long)(addr) / (PAGE_SIZE * 2) % MAX_INFO_LISTS)
|
|
|
|
struct mem_block_info {
|
|
struct list_head inf_head;
|
|
void *inf_data;
|
|
int inf_len;
|
|
int inf_line;
|
|
bool inf_used;
|
|
};
|
|
|
|
static struct list_head inf_anchor[MAX_INFO_LISTS];
|
|
static rwlock_t inf_lock[MAX_INFO_LISTS];
|
|
|
|
static
|
|
void _new_block_info(void *data, int len, int cline)
|
|
{
|
|
struct mem_block_info *inf;
|
|
int ms = 0;
|
|
unsigned int hash;
|
|
unsigned long flags;
|
|
|
|
for (;;) {
|
|
inf = kmalloc(sizeof(struct mem_block_info), GFP_BRICK);
|
|
if (likely(inf))
|
|
break;
|
|
msleep_backoff(&ms);
|
|
}
|
|
inf->inf_data = data;
|
|
inf->inf_len = len;
|
|
inf->inf_line = cline;
|
|
inf->inf_used = true;
|
|
|
|
hash = INFO_LIST_HASH(data);
|
|
|
|
write_lock_irqsave(&inf_lock[hash], flags);
|
|
list_add(&inf->inf_head, &inf_anchor[hash]);
|
|
write_unlock_irqrestore(&inf_lock[hash], flags);
|
|
}
|
|
|
|
static
|
|
struct mem_block_info *_find_block_info(void *data, bool remove)
|
|
{
|
|
struct mem_block_info *res = NULL;
|
|
struct list_head *tmp;
|
|
unsigned int hash = INFO_LIST_HASH(data);
|
|
unsigned long flags;
|
|
|
|
if (remove)
|
|
write_lock_irqsave(&inf_lock[hash], flags);
|
|
else
|
|
read_lock_irqsave(&inf_lock[hash], flags);
|
|
for (tmp = inf_anchor[hash].next; tmp != &inf_anchor[hash]; tmp = tmp->next) {
|
|
struct mem_block_info *inf = container_of(tmp, struct mem_block_info, inf_head);
|
|
if (inf->inf_data != data)
|
|
continue;
|
|
if (remove)
|
|
list_del_init(tmp);
|
|
res = inf;
|
|
break;
|
|
}
|
|
if (remove)
|
|
write_unlock_irqrestore(&inf_lock[hash], flags);
|
|
else
|
|
read_unlock_irqrestore(&inf_lock[hash], flags);
|
|
return res;
|
|
}
|
|
|
|
#endif // CONFIG_MARS_DEBUG_MEM_STRONG
|
|
|
|
static inline
|
|
void *__brick_block_alloc(gfp_t gfp, int order, int cline)
|
|
{
|
|
void *res;
|
|
int ms = 0;
|
|
|
|
for (;;) {
|
|
#ifdef USE_KERNEL_PAGES
|
|
res = (void*)__get_free_pages(gfp, order);
|
|
#else
|
|
res = __vmalloc(PAGE_SIZE << order, gfp, PAGE_KERNEL_IO);
|
|
#endif
|
|
if (likely(res))
|
|
break;
|
|
msleep_backoff(&ms);
|
|
}
|
|
|
|
if (likely(res)) {
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
_new_block_info(res, PAGE_SIZE << order, cline);
|
|
#endif
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_inc(&phys_block_alloc);
|
|
atomic_inc(&raw_count[order]);
|
|
#endif
|
|
atomic64_add((PAGE_SIZE/1024) << order, &brick_global_block_used);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline
|
|
void __brick_block_free(void *data, int order, int cline)
|
|
{
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
struct mem_block_info *inf = _find_block_info(data, true);
|
|
if (likely(inf)) {
|
|
int inf_len = inf->inf_len;
|
|
int inf_line = inf->inf_line;
|
|
kfree(inf);
|
|
if (unlikely(inf_len != (PAGE_SIZE << order))) {
|
|
BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n", cline, data, (int)(PAGE_SIZE << order), inf_len, inf_line);
|
|
goto err;
|
|
}
|
|
} else {
|
|
BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
|
|
goto err;
|
|
}
|
|
#endif
|
|
#ifdef USE_KERNEL_PAGES
|
|
__free_pages(virt_to_page((unsigned long)data), order);
|
|
#else
|
|
vfree(data);
|
|
#endif
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
err:
|
|
#endif
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_dec(&phys_block_alloc);
|
|
atomic_dec(&raw_count[order]);
|
|
#endif
|
|
atomic64_sub((PAGE_SIZE/1024) << order, &brick_global_block_used);
|
|
}
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
int brick_allow_freelist = 1;
|
|
EXPORT_SYMBOL_GPL(brick_allow_freelist);
|
|
|
|
int brick_pre_reserve[BRICK_MAX_ORDER+1] = {};
|
|
EXPORT_SYMBOL_GPL(brick_pre_reserve);
|
|
|
|
/* Note: we have no separate lists per CPU.
|
|
* This should not hurt because the freelists are only used
|
|
* for higher-order pages which should be rather low-frequency.
|
|
*/
|
|
static spinlock_t freelist_lock[BRICK_MAX_ORDER+1];
|
|
static void *brick_freelist[BRICK_MAX_ORDER+1] = {};
|
|
static atomic_t freelist_count[BRICK_MAX_ORDER+1] = {};
|
|
|
|
static
|
|
void *_get_free(int order, int cline)
|
|
{
|
|
void *data;
|
|
unsigned long flags;
|
|
|
|
traced_lock(&freelist_lock[order], flags);
|
|
data = brick_freelist[order];
|
|
if (likely(data)) {
|
|
void *next = *(void**)data;
|
|
#ifdef BRICK_DEBUG_MEM // check for corruptions
|
|
long pattern = *(((long*)data)+1);
|
|
void *copy = *(((void**)data)+2);
|
|
if (unlikely(pattern != 0xf0f0f0f0f0f0f0f0 || next != copy)) { // found a corruption
|
|
// prevent further trouble by leaving a memleak
|
|
brick_freelist[order] = NULL;
|
|
traced_unlock(&freelist_lock[order], flags);
|
|
BRICK_ERR("line %d:freelist corruption at %p (pattern = %lx next %p != %p, murdered = %d), order = %d\n",
|
|
cline, data, pattern, next, copy, atomic_read(&freelist_count[order]), order);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
brick_freelist[order] = next;
|
|
atomic_dec(&freelist_count[order]);
|
|
}
|
|
traced_unlock(&freelist_lock[order], flags);
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
if (data) {
|
|
struct mem_block_info *inf = _find_block_info(data, false);
|
|
if (likely(inf)) {
|
|
if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
|
|
BRICK_ERR("line %d: address %p: bad freelist size %d (correct should be %d, previous line = %d)\n",
|
|
cline, data, (int)(PAGE_SIZE << order), inf->inf_len, inf->inf_line);
|
|
}
|
|
inf->inf_line = cline;
|
|
inf->inf_used = true;
|
|
} else {
|
|
BRICK_ERR("line %d: freelist address %p is invalid (order = %d)\n", cline, data, order);
|
|
}
|
|
}
|
|
#endif
|
|
return data;
|
|
}
|
|
|
|
static
|
|
void _put_free(void *data, int order)
|
|
{
|
|
void *next;
|
|
unsigned long flags;
|
|
|
|
#ifdef BRICK_DEBUG_MEM // fill with pattern
|
|
memset(data, 0xf0, PAGE_SIZE << order);
|
|
#endif
|
|
|
|
traced_lock(&freelist_lock[order], flags);
|
|
next = brick_freelist[order];
|
|
*(void**)data = next;
|
|
#ifdef BRICK_DEBUG_MEM // insert redundant copy for checking
|
|
*(((void**)data)+2) = next;
|
|
#endif
|
|
brick_freelist[order] = data;
|
|
traced_unlock(&freelist_lock[order], flags);
|
|
atomic_inc(&freelist_count[order]);
|
|
}
|
|
|
|
static
|
|
void _free_all(void)
|
|
{
|
|
int order;
|
|
for (order = BRICK_MAX_ORDER; order >= 0; order--) {
|
|
for (;;) {
|
|
void *data = _get_free(order, __LINE__);
|
|
if (!data)
|
|
break;
|
|
__brick_block_free(data, order, __LINE__);
|
|
}
|
|
}
|
|
}
|
|
|
|
int brick_mem_reserve(void)
|
|
{
|
|
int order;
|
|
int status = 0;
|
|
|
|
for (order = BRICK_MAX_ORDER; order >= 0; order--) {
|
|
int max = brick_pre_reserve[order];
|
|
int i;
|
|
|
|
brick_mem_freelist_max[order] += max;
|
|
BRICK_INF("preallocating %d at order %d (new maxlevel = %d)\n", max, order, brick_mem_freelist_max[order]);
|
|
|
|
max = brick_mem_freelist_max[order] - atomic_read(&freelist_count[order]);
|
|
if (max >= 0) {
|
|
for (i = 0; i < max; i++) {
|
|
void *data = __brick_block_alloc(GFP_KERNEL, order, __LINE__);
|
|
|
|
_put_free(data, order);
|
|
}
|
|
} else {
|
|
for (i = 0; i < -max; i++) {
|
|
void *data = _get_free(order, __LINE__);
|
|
if (likely(data)) {
|
|
__brick_block_free(data, order, __LINE__);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return status;
|
|
}
|
|
#else
|
|
int brick_mem_reserve(struct mem_reservation *r)
|
|
{
|
|
BRICK_INF("preallocation is not compiled in\n");
|
|
return 0;
|
|
}
|
|
#endif
|
|
EXPORT_SYMBOL_GPL(brick_mem_reserve);
|
|
|
|
void *_brick_block_alloc(loff_t pos, int len, int line)
|
|
{
|
|
void *data;
|
|
int count;
|
|
#ifdef BRICK_DEBUG_MEM
|
|
#ifdef BRICK_DEBUG_ORDER0
|
|
const int plus0 = PAGE_SIZE;
|
|
#else
|
|
const int plus0 = 0;
|
|
#endif
|
|
const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
|
|
#else
|
|
const int plus = 0;
|
|
#endif
|
|
int order = len2order(len + plus);
|
|
|
|
if (unlikely(order < 0)) {
|
|
BRICK_ERR("trying to allocate %d bytes (max = %d)\n", len, (int)(PAGE_SIZE << order));
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef CONFIG_MARS_DEBUG
|
|
might_sleep();
|
|
#endif
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
count = atomic_add_return(1, &_alloc_count[order]);
|
|
brick_mem_alloc_count[order] = count;
|
|
if (count > brick_mem_alloc_max[order])
|
|
brick_mem_alloc_max[order] = count;
|
|
#endif
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
atomic_inc(&op_count[order]);
|
|
// statistics
|
|
alloc_line[order] = line;
|
|
alloc_len[order] = len;
|
|
#endif
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
/* Dynamic increase of limits, in order to reduce
|
|
* fragmentation on higher-order pages.
|
|
* This comes on cost of higher memory usage.
|
|
*/
|
|
if (order > 0 && count > brick_mem_freelist_max[order])
|
|
brick_mem_freelist_max[order] = count;
|
|
#endif
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
data = _get_free(order, line);
|
|
if (!data)
|
|
#endif
|
|
data = __brick_block_alloc(GFP_BRICK, order, line);
|
|
|
|
#ifdef BRICK_DEBUG_MEM
|
|
if (likely(data) && order > 0) {
|
|
if (unlikely(line < 0))
|
|
line = 0;
|
|
else if (unlikely(line >= BRICK_DEBUG_MEM))
|
|
line = BRICK_DEBUG_MEM - 1;
|
|
atomic_inc(&block_count[line]);
|
|
block_len[line] = len;
|
|
if (order > 1) {
|
|
INT_ACCESS(data, 0 * sizeof(int)) = MAGIC_BLOCK;
|
|
INT_ACCESS(data, 1 * sizeof(int)) = line;
|
|
INT_ACCESS(data, 2 * sizeof(int)) = len;
|
|
data += PAGE_SIZE;
|
|
INT_ACCESS(data, -1 * sizeof(int)) = MAGIC_BLOCK;
|
|
INT_ACCESS(data, len) = MAGIC_BEND;
|
|
} else if (order == 1) {
|
|
INT_ACCESS(data, PAGE_SIZE + 0 * sizeof(int)) = MAGIC_BLOCK;
|
|
INT_ACCESS(data, PAGE_SIZE + 1 * sizeof(int)) = line;
|
|
INT_ACCESS(data, PAGE_SIZE + 2 * sizeof(int)) = len;
|
|
}
|
|
}
|
|
#endif
|
|
return data;
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_block_alloc);
|
|
|
|
void _brick_block_free(void *data, int len, int cline)
|
|
{
|
|
int order;
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
struct mem_block_info *inf;
|
|
char *real_data;
|
|
#endif
|
|
#ifdef BRICK_DEBUG_MEM
|
|
int prev_line = 0;
|
|
#ifdef BRICK_DEBUG_ORDER0
|
|
const int plus0 = PAGE_SIZE;
|
|
#else
|
|
const int plus0 = 0;
|
|
#endif
|
|
const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
|
|
#else
|
|
const int plus = 0;
|
|
#endif
|
|
|
|
order = len2order(len + plus);
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
real_data = data;
|
|
if (order > 1)
|
|
real_data -= PAGE_SIZE;
|
|
inf = _find_block_info(real_data, false);
|
|
if (likely(inf)) {
|
|
prev_line = inf->inf_line;
|
|
if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
|
|
BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n",
|
|
cline, data, (int)(PAGE_SIZE << order), inf->inf_len, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(!inf->inf_used)) {
|
|
BRICK_ERR("line %d: address %p: double freeing (previous line = %d)\n", cline, data, prev_line);
|
|
return;
|
|
}
|
|
inf->inf_line = cline;
|
|
inf->inf_used = false;
|
|
} else {
|
|
BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
|
|
return;
|
|
}
|
|
#endif
|
|
#ifdef BRICK_DEBUG_MEM
|
|
(void)prev_line; /* silence annoying compiler warning */
|
|
if (order > 1) {
|
|
void *test = data - PAGE_SIZE;
|
|
int magic = INT_ACCESS(test, 0);
|
|
int line = INT_ACCESS(test, sizeof(int));
|
|
int oldlen = INT_ACCESS(test, sizeof(int)*2);
|
|
int magic1 = INT_ACCESS(data, -1 * sizeof(int));
|
|
int magic2;
|
|
|
|
if (unlikely(magic1 != MAGIC_BLOCK)) {
|
|
BRICK_ERR("line %d memory corruption: %p magix1 %08x != %08x (previous line = %d)\n", cline, data, magic1, MAGIC_BLOCK, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(magic != MAGIC_BLOCK)) {
|
|
BRICK_ERR("line %d memory corruption: %p magix %08x != %08x (previous line = %d)\n", cline, data, magic, MAGIC_BLOCK, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
|
|
BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n", cline, data, line, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(oldlen != len)) {
|
|
BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n", cline, data, len, oldlen, prev_line);
|
|
return;
|
|
}
|
|
magic2 = INT_ACCESS(data, len);
|
|
if (unlikely(magic2 != MAGIC_BEND)) {
|
|
BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n", cline, data, magic, MAGIC_BEND, prev_line);
|
|
return;
|
|
}
|
|
INT_ACCESS(test, 0) = 0xffffffff;
|
|
INT_ACCESS(data, len) = 0xffffffff;
|
|
data = test;
|
|
atomic_dec(&block_count[line]);
|
|
atomic_inc(&block_free[line]);
|
|
} else if (order == 1) {
|
|
void *test = data + PAGE_SIZE;
|
|
int magic = INT_ACCESS(test, 0 * sizeof(int));
|
|
int line = INT_ACCESS(test, 1 * sizeof(int));
|
|
int oldlen = INT_ACCESS(test, 2 * sizeof(int));
|
|
|
|
if (unlikely(magic != MAGIC_BLOCK)) {
|
|
BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n", cline, data, magic, MAGIC_BLOCK, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
|
|
BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n", cline, data, line, prev_line);
|
|
return;
|
|
}
|
|
if (unlikely(oldlen != len)) {
|
|
BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n", cline, data, len, oldlen, prev_line);
|
|
return;
|
|
}
|
|
atomic_dec(&block_count[line]);
|
|
atomic_inc(&block_free[line]);
|
|
}
|
|
#endif /* BRICK_DEBUG_MEM */
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
if (order > 0 && brick_allow_freelist && atomic_read(&freelist_count[order]) <= brick_mem_freelist_max[order]) {
|
|
_put_free(data, order);
|
|
} else
|
|
#endif
|
|
__brick_block_free(data, order, cline);
|
|
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
brick_mem_alloc_count[order] = atomic_dec_return(&_alloc_count[order]);
|
|
#endif
|
|
}
|
|
EXPORT_SYMBOL_GPL(_brick_block_free);
|
|
|
|
struct page *brick_iomap(void *data, int *offset, int *len)
|
|
{
|
|
int _offset = ((unsigned long)data) & (PAGE_SIZE-1);
|
|
struct page *page;
|
|
*offset = _offset;
|
|
if (*len > PAGE_SIZE - _offset) {
|
|
*len = PAGE_SIZE - _offset;
|
|
}
|
|
if (is_vmalloc_addr(data)) {
|
|
page = vmalloc_to_page(data);
|
|
} else {
|
|
page = virt_to_page(data);
|
|
}
|
|
return page;
|
|
}
|
|
EXPORT_SYMBOL_GPL(brick_iomap);
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// module
|
|
|
|
#ifdef CONFIG_MARS_DEBUG_DEVEL_VIA_SAY
|
|
|
|
void brick_mem_statistics(bool final)
|
|
{
|
|
#ifdef BRICK_DEBUG_MEM
|
|
int i;
|
|
int count = 0;
|
|
int places = 0;
|
|
|
|
BRICK_INF("======== page allocation:\n");
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
for (i = 0; i <= BRICK_MAX_ORDER; i++) {
|
|
BRICK_INF("pages order = %2d "
|
|
"operations = %9d "
|
|
"freelist_count = %4d / %3d "
|
|
"raw_count = %5d "
|
|
"alloc_count = %5d "
|
|
"alloc_len = %5d "
|
|
"line = %5d "
|
|
"max_count = %5d\n",
|
|
i,
|
|
atomic_read(&op_count[i]),
|
|
atomic_read(&freelist_count[i]),
|
|
brick_mem_freelist_max[i],
|
|
atomic_read(&raw_count[i]),
|
|
brick_mem_alloc_count[i],
|
|
alloc_len[i],
|
|
alloc_line[i],
|
|
brick_mem_alloc_max[i]);
|
|
}
|
|
#endif
|
|
for (i = 0; i < BRICK_DEBUG_MEM; i++) {
|
|
int val = atomic_read(&block_count[i]);
|
|
if (val) {
|
|
count += val;
|
|
places++;
|
|
BRICK_INF("line %4d: "
|
|
"%6d allocated "
|
|
"(last size = %4d, freed = %6d)\n",
|
|
i,
|
|
val,
|
|
block_len[i],
|
|
atomic_read(&block_free[i]));
|
|
}
|
|
}
|
|
BRICK_COND_ERR(final && count,
|
|
"======== %d block allocations in %d places (phys=%d)\n",
|
|
count, places, atomic_read(&phys_block_alloc));
|
|
|
|
count = places = 0;
|
|
for (i = 0; i < BRICK_DEBUG_MEM; i++) {
|
|
int val = atomic_read(&mem_count[i]);
|
|
if (val) {
|
|
count += val;
|
|
places++;
|
|
BRICK_INF("line %4d: "
|
|
"%6d allocated "
|
|
"(last size = %4d, freed = %6d)\n",
|
|
i,
|
|
val,
|
|
mem_len[i],
|
|
atomic_read(&mem_free[i]));
|
|
}
|
|
}
|
|
BRICK_COND_ERR(final && count,
|
|
"======== %d memory allocations in %d places (phys=%d,redirect=%d)\n",
|
|
count, places,
|
|
atomic_read(&phys_mem_alloc), atomic_read(&mem_redirect_alloc));
|
|
|
|
count = places = 0;
|
|
for (i = 0; i < BRICK_DEBUG_MEM; i++) {
|
|
int val = atomic_read(&string_count[i]);
|
|
if (val) {
|
|
count += val;
|
|
places++;
|
|
BRICK_INF("line %4d: "
|
|
"%6d allocated "
|
|
"(freed = %6d)\n",
|
|
i,
|
|
val,
|
|
atomic_read(&string_free[i]));
|
|
}
|
|
}
|
|
BRICK_COND_ERR(final && count,
|
|
"======== %d string allocations in %d places (phys=%d)\n",
|
|
count, places, atomic_read(&phys_string_alloc));
|
|
#endif
|
|
}
|
|
EXPORT_SYMBOL_GPL(brick_mem_statistics);
|
|
|
|
#endif /* CONFIG_MARS_DEBUG_DEVEL_VIA_SAY */
|
|
|
|
// module init stuff
|
|
|
|
int __init init_brick_mem(void)
|
|
{
|
|
int i;
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
for (i = BRICK_MAX_ORDER; i >= 0; i--) {
|
|
spin_lock_init(&freelist_lock[i]);
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
|
|
for (i = 0; i < MAX_INFO_LISTS; i++) {
|
|
INIT_LIST_HEAD(&inf_anchor[i]);
|
|
rwlock_init(&inf_lock[i]);
|
|
}
|
|
#else
|
|
(void)i;
|
|
#endif
|
|
|
|
get_total_ram();
|
|
|
|
return 0;
|
|
}
|
|
|
|
void exit_brick_mem(void)
|
|
{
|
|
BRICK_INF("deallocating memory...\n");
|
|
#ifdef CONFIG_MARS_MEM_PREALLOC
|
|
_free_all();
|
|
#endif
|
|
#ifdef CONFIG_MARS_DEBUG_DEVEL_VIA_SAY
|
|
brick_mem_statistics(true);
|
|
#endif /* CONFIG_MARS_DEBUG_DEVEL_VIA_SAY */
|
|
}
|