mars/mars_generic.c

1572 lines
38 KiB
C

// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/utsname.h>
#define _STRATEGY
#include "mars.h"
#include "mars_client.h"
#include <linux/syscalls.h>
#include <linux/namei.h>
#include <linux/kthread.h>
#define SKIP_BIO false
/////////////////////////////////////////////////////////////////////////
// MARS-specific memory allocation
#define USE_KERNEL_PAGES
#define MARS_MAX_ORDER 8
//#define USE_OFFSET
//#define USE_INTERNAL_FREELIST
#ifdef USE_INTERNAL_FREELIST
void *mars_freelist[MARS_MAX_ORDER+1] = {};
atomic_t freelist_count[MARS_MAX_ORDER+1] = {};
#endif
void *mars_alloc(loff_t pos, int len)
{
int offset = 0;
void *data;
#ifdef USE_KERNEL_PAGES
int order = MARS_MAX_ORDER;
if (unlikely(len > (PAGE_SIZE << order) || len <=0)) {
MARS_ERR("trying to allocate %d bytes (max = %d)\n", len, (PAGE_SIZE << order));
return NULL;
}
#endif
#ifdef USE_OFFSET
offset = pos & (PAGE_SIZE-1);
#endif
#ifdef USE_KERNEL_PAGES
len += offset;
while (order > 0 && (PAGE_SIZE << (order-1)) >= len) {
order--;
}
#ifdef USE_INTERNAL_FREELIST
data = mars_freelist[order];
if (data) {
mars_freelist[order] = *(void**)data;
atomic_dec(&freelist_count[order]);
} else
#endif
data = (void*)__get_free_pages(GFP_MARS, order);
#else
data = __vmalloc(len + offset, GFP_MARS, PAGE_KERNEL_IO);
#endif
if (likely(data)) {
data += offset;
}
return data;
}
EXPORT_SYMBOL_GPL(mars_alloc);
void mars_free(void *data, int len)
{
int offset = 0;
#ifdef USE_KERNEL_PAGES
int order = MARS_MAX_ORDER;
#endif
if (!data) {
return;
}
#ifdef USE_OFFSET
offset = ((unsigned long)data) & (PAGE_SIZE-1);
#endif
data -= offset;
#ifdef USE_KERNEL_PAGES
len += offset;
while (order > 0 && (PAGE_SIZE << (order-1)) >= len) {
order--;
}
#ifdef USE_INTERNAL_FREELIST
if (order > 0 && atomic_read(&freelist_count[order]) < 500) {
static int max[MARS_MAX_ORDER+1] = {};
int now;
*(void**)data = mars_freelist[order];
mars_freelist[order] = data;
atomic_inc(&freelist_count[order]);
now = atomic_read(&freelist_count[order]);
if (now > max[order] + 50) {
int i;
max[order] = now;
MARS_INF("now %d freelist members at order %d (len = %d)\n", now, order, len);
for (i = 0; i <= MARS_MAX_ORDER; i++) {
MARS_INF(" %d : %4d\n", i, atomic_read(&freelist_count[i]));
}
}
} else
#endif
__free_pages(virt_to_page((unsigned long)data), order);
#else
vfree(data);
#endif
}
EXPORT_SYMBOL_GPL(mars_free);
struct page *mars_iomap(void *data, int *offset, int *len)
{
int _offset = ((unsigned long)data) & (PAGE_SIZE-1);
struct page *page;
*offset = _offset;
if (*len > PAGE_SIZE - _offset) {
*len = PAGE_SIZE - _offset;
}
if (is_vmalloc_addr(data)) {
page = vmalloc_to_page(data);
} else {
page = virt_to_page(data);
}
return page;
}
EXPORT_SYMBOL_GPL(mars_iomap);
/////////////////////////////////////////////////////////////////////
// meta descriptions
const struct meta mars_info_meta[] = {
META_INI(current_size, struct mars_info, FIELD_INT),
META_INI(transfer_order, struct mars_info, FIELD_INT),
META_INI(transfer_size, struct mars_info, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_info_meta);
const struct meta mars_mref_meta[] = {
META_INI(ref_pos, struct mref_object, FIELD_INT),
META_INI(ref_len, struct mref_object, FIELD_INT),
META_INI(ref_may_write, struct mref_object, FIELD_INT),
META_INI(ref_prio, struct mref_object, FIELD_INT),
META_INI(ref_timeout, struct mref_object, FIELD_INT),
META_INI(ref_total_size, struct mref_object, FIELD_INT),
META_INI(ref_flags, struct mref_object, FIELD_INT),
META_INI(ref_rw, struct mref_object, FIELD_INT),
META_INI(ref_id, struct mref_object, FIELD_INT),
META_INI(ref_skip_sync, struct mref_object, FIELD_INT),
META_INI(_ref_cb.cb_error, struct mref_object, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_mref_meta);
const struct meta mars_timespec_meta[] = {
META_INI(tv_sec, struct timespec, FIELD_INT),
META_INI(tv_nsec, struct timespec, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_timespec_meta);
const struct meta mars_kstat_meta[] = {
META_INI(ino, struct kstat, FIELD_INT),
META_INI(mode, struct kstat, FIELD_INT),
META_INI(size, struct kstat, FIELD_INT),
META_INI_SUB(atime, struct kstat, mars_timespec_meta),
META_INI_SUB(mtime, struct kstat, mars_timespec_meta),
META_INI_SUB(ctime, struct kstat, mars_timespec_meta),
META_INI(blksize, struct kstat, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_kstat_meta);
const struct meta mars_dent_meta[] = {
META_INI(d_name, struct mars_dent, FIELD_STRING),
META_INI(d_rest, struct mars_dent, FIELD_STRING),
META_INI(d_path, struct mars_dent, FIELD_STRING),
META_INI(d_namelen, struct mars_dent, FIELD_INT),
META_INI(d_pathlen, struct mars_dent, FIELD_INT),
META_INI(d_type, struct mars_dent, FIELD_INT),
META_INI(d_class, struct mars_dent, FIELD_INT),
META_INI(d_serial, struct mars_dent, FIELD_INT),
META_INI_SUB(new_stat,struct mars_dent, mars_kstat_meta),
META_INI_SUB(old_stat,struct mars_dent, mars_kstat_meta),
META_INI(new_link, struct mars_dent, FIELD_STRING),
META_INI(old_link, struct mars_dent, FIELD_STRING),
META_INI(d_args, struct mars_dent, FIELD_STRING),
META_INI(d_argv[0], struct mars_dent, FIELD_STRING),
META_INI(d_argv[1], struct mars_dent, FIELD_STRING),
META_INI(d_argv[2], struct mars_dent, FIELD_STRING),
META_INI(d_argv[3], struct mars_dent, FIELD_STRING),
{}
};
EXPORT_SYMBOL_GPL(mars_dent_meta);
/////////////////////////////////////////////////////////////////////
// tracing
#ifdef MARS_TRACING
unsigned long long start_trace_clock = 0;
EXPORT_SYMBOL_GPL(start_trace_clock);
struct file *mars_log_file = NULL;
loff_t mars_log_pos = 0;
void _mars_log(char *buf, int len)
{
static DECLARE_MUTEX(trace_lock);
mm_segment_t oldfs;
oldfs = get_fs();
set_fs(get_ds());
down(&trace_lock);
vfs_write(mars_log_file, buf, len, &mars_log_pos);
up(&trace_lock);
set_fs(oldfs);
}
EXPORT_SYMBOL_GPL(_mars_log);
void mars_log(const char *fmt, ...)
{
char *buf = kmalloc(PAGE_SIZE, GFP_MARS);
va_list args;
int len;
if (!buf)
return;
va_start(args, fmt);
len = vsnprintf(buf, PAGE_SIZE, fmt, args);
va_end(args);
_mars_log(buf, len);
kfree(buf);
}
EXPORT_SYMBOL_GPL(mars_log);
void mars_trace(struct mref_object *mref, const char *info)
{
int index = mref->ref_traces;
if (likely(index < MAX_TRACES)) {
mref->ref_trace_stamp[index] = cpu_clock(raw_smp_processor_id());
mref->ref_trace_info[index] = info;
mref->ref_traces++;
}
}
EXPORT_SYMBOL_GPL(mars_trace);
void mars_log_trace(struct mref_object *mref)
{
char *buf = kmalloc(PAGE_SIZE, GFP_MARS);
unsigned long long old;
unsigned long long diff;
int i;
int len;
if (!buf) {
return;
}
if (!mars_log_file || !mref->ref_traces) {
goto done;
}
if (!start_trace_clock) {
start_trace_clock = mref->ref_trace_stamp[0];
}
diff = mref->ref_trace_stamp[mref->ref_traces-1] - mref->ref_trace_stamp[0];
len = snprintf(buf, PAGE_SIZE, "%c ;%12lld ;%6d;%10llu", mref->ref_rw ? 'W' : 'R', mref->ref_pos, mref->ref_len, diff / 1000);
old = start_trace_clock;
for (i = 0; i < mref->ref_traces; i++) {
diff = mref->ref_trace_stamp[i] - old;
len += snprintf(buf + len, PAGE_SIZE - len, " ; %s ;%10llu", mref->ref_trace_info[i], diff / 1000);
old = mref->ref_trace_stamp[i];
}
len +=snprintf(buf + len, PAGE_SIZE - len, "\n");
_mars_log(buf, len);
done:
kfree(buf);
mref->ref_traces = 0;
}
EXPORT_SYMBOL_GPL(mars_log_trace);
#endif // MARS_TRACING
/////////////////////////////////////////////////////////////////////
// some helpers
int mars_stat(const char *path, struct kstat *stat, bool use_lstat)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
if (use_lstat) {
status = vfs_lstat((char*)path, stat);
} else {
status = vfs_stat((char*)path, stat);
}
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_stat);
int mars_mkdir(const char *path)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_mkdir(path, 0700);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_mkdir);
int mars_symlink(const char *oldpath, const char *newpath, const struct timespec *stamp, uid_t uid)
{
char *tmp = backskip_replace(newpath, '/', true, "/.tmp-");
mm_segment_t oldfs;
int status = -ENOMEM;
if (unlikely(!tmp))
goto done;
oldfs = get_fs();
set_fs(get_ds());
(void)sys_unlink(tmp);
status = sys_symlink(oldpath, tmp);
if (stamp) {
struct timespec times[2];
sys_lchown(tmp, uid, 0);
memcpy(&times[0], stamp, sizeof(struct timespec));
memcpy(&times[1], stamp, sizeof(struct timespec));
status = do_utimes(AT_FDCWD, tmp, times, AT_SYMLINK_NOFOLLOW);
}
if (status >= 0) {
status = mars_rename(tmp, newpath);
}
set_fs(oldfs);
kfree(tmp);
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_symlink);
int mars_rename(const char *oldpath, const char *newpath)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_rename(oldpath, newpath);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_rename);
int mars_chmod(const char *path, mode_t mode)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_chmod(path, mode);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_chmod);
int mars_lchown(const char *path, uid_t uid)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_lchown(path, uid, 0);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_lchown);
#include <linux/crypto.h>
struct crypto_hash *mars_tfm = NULL;
int mars_digest_size = 0;
EXPORT_SYMBOL_GPL(mars_digest_size);
void mars_digest(void *digest, void *data, int len)
{
struct hash_desc desc = {
.tfm = mars_tfm,
.flags = 0,
};
struct scatterlist sg;
memset(digest, 0, mars_digest_size);
crypto_hash_init(&desc);
sg_init_table(&sg, 1);
sg_set_buf(&sg, data, len);
crypto_hash_update(&desc, &sg, sg.length);
crypto_hash_final(&desc, digest);
}
EXPORT_SYMBOL_GPL(mars_digest);
//////////////////////////////////////////////////////////////
// object stuff
const struct generic_object_type mref_type = {
.object_type_name = "mref",
.default_size = sizeof(struct mref_object),
.brick_obj_nr = BRICK_OBJ_MREF,
};
EXPORT_SYMBOL_GPL(mref_type);
//////////////////////////////////////////////////////////////
// brick stuff
//////////////////////////////////////////////////////////////
// infrastructure
static char *id = NULL;
/* TODO: better use MAC addresses (or motherboard IDs where available).
* Or, at least, some checks for MAC addresses should be recorded / added.
* When the nodename is misconfigured, data might be scrambled.
* MAC addresses should be more secure.
* In ideal case, further checks should be added to prohibit accidental
* name clashes.
*/
char *my_id(void)
{
struct new_utsname *u;
if (id)
return id;
//down_read(&uts_sem); // FIXME: this is currenty not EXPORTed from the kernel!
u = utsname();
if (u) {
id = kstrdup(u->nodename, GFP_MARS);
}
//up_read(&uts_sem);
return id;
}
EXPORT_SYMBOL_GPL(my_id);
struct mars_global *mars_global = NULL;
EXPORT_SYMBOL_GPL(mars_global);
void _mars_trigger(void)
{
if (mars_global) {
mars_global->main_trigger = true;
wake_up_interruptible(&mars_global->main_event);
}
}
EXPORT_SYMBOL_GPL(_mars_trigger);
int mars_power_button(struct mars_brick *brick, bool val, bool force_off)
{
int status = 0;
bool oldval = brick->power.button;
if (force_off && !val)
brick->power.force_off = true;
if (brick->power.force_off)
val = false;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' power button %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_button(&brick->power, val, false);
if (brick->ops)
status = brick->ops->brick_switch(brick);
mars_trigger();
}
return status;
}
EXPORT_SYMBOL_GPL(mars_power_button);
int mars_power_button_recursive(struct mars_brick *brick, bool val, bool force_off, int timeout)
{
int status = 0;
bool oldval = brick->power.button;
if (force_off && !val)
brick->power.force_off = true;
if (brick->power.force_off)
val = false;
if (val != oldval) {
brick_switch_t mode;
mode = (val ? BR_ON_ALL : (force_off ? BR_FREE_ALL : BR_OFF_ALL));
MARS_DBG("brick '%s' type '%s' power button %d -> %d (mode = %d)\n", brick->brick_path, brick->type->type_name, oldval, val, mode);
status = set_recursive_button((void*)brick, mode, timeout);
}
return status;
}
EXPORT_SYMBOL_GPL(mars_power_button_recursive);
void mars_power_led_on(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_on;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' led_on %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_on(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_on);
void mars_power_led_off(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_off;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' led_off %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_off(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_off);
/////////////////////////////////////////////////////////////////////
// strategy layer
struct mars_cookie {
struct mars_global *global;
mars_dent_checker checker;
char *path;
struct mars_dent *parent;
int pathlen;
int allocsize;
int depth;
};
static
int get_inode(char *newpath, struct mars_dent *dent)
{
mm_segment_t oldfs;
int status;
struct kstat tmp = {};
oldfs = get_fs();
set_fs(get_ds());
status = vfs_lstat(newpath, &tmp);
if (status < 0) {
MARS_ERR("cannot stat '%s', status = %d\n", newpath, status);
goto done;
}
memcpy(&dent->old_stat, &dent->new_stat, sizeof(dent->old_stat));
memcpy(&dent->new_stat, &tmp, sizeof(dent->new_stat));
if (S_ISLNK(dent->new_stat.mode)) {
struct path path = {};
int len = dent->new_stat.size;
struct inode *inode;
char *link;
if (unlikely(len <= 0)) {
MARS_ERR("symlink '%s' bad len = %d\n", newpath, len);
status = -EINVAL;
goto done;
}
status = user_path_at(AT_FDCWD, newpath, 0, &path);
if (unlikely(status < 0)) {
MARS_ERR("cannot read link '%s'\n", newpath);
goto done;
}
inode = path.dentry->d_inode;
status = -ENOMEM;
link = kmalloc(len + 2, GFP_MARS);
if (likely(link)) {
MARS_IO("len = %d\n", len);
status = inode->i_op->readlink(path.dentry, link, len + 1);
link[len] = '\0';
if (status < 0 ||
(dent->new_link && !strncmp(dent->new_link, link, len))) {
//MARS_IO("symlink no change '%s' -> '%s' (%s) status = %d\n", newpath, link, dent->new_link ? dent->new_link : "", status);
kfree(link);
} else {
MARS_IO("symlink '%s' -> '%s' (%s) status = %d\n", newpath, link, dent->new_link ? dent->new_link : "", status);
if (dent->old_link)
kfree(dent->old_link);
dent->old_link = dent->new_link;
dent->new_link = link;
}
}
path_put(&path);
}
if (dent->new_link)
MARS_IO("symlink '%s'\n", dent->new_link);
done:
set_fs(oldfs);
return status;
}
static
int mars_filler(void *__buf, const char *name, int namlen, loff_t offset,
u64 ino, unsigned int d_type)
{
struct mars_cookie *cookie = __buf;
struct mars_global *global = cookie->global;
struct list_head *anchor = &global->dent_anchor;
struct mars_dent *dent;
struct list_head *tmp;
struct mars_dent *best = NULL;
char *newpath;
int prefix = 0;
int pathlen;
int class;
int serial = 0;
MARS_IO("ino = %llu len = %d offset = %lld type = %u\n", ino, namlen, offset, d_type);
if (name[0] == '.') {
return 0;
}
class = cookie->checker(cookie->parent, name, namlen, d_type, &prefix, &serial);
if (class < 0)
return 0;
pathlen = cookie->pathlen;
newpath = kmalloc(pathlen + namlen + 2, GFP_MARS);
if (unlikely(!newpath))
goto err_mem0;
memcpy(newpath, cookie->path, pathlen);
newpath[pathlen++] = '/';
memcpy(newpath + pathlen, name, namlen);
pathlen += namlen;
newpath[pathlen] = '\0';
MARS_IO("path = '%s'\n", newpath);
for (tmp = anchor->next; tmp != anchor; tmp = tmp->next) {
int cmp;
dent = container_of(tmp, struct mars_dent, dent_link);
cmp = strcmp(dent->d_path, newpath);
if (!cmp) {
goto found;
}
// keep the list sorted. find the next smallest member.
if ((dent->d_class < class ||
(dent->d_class == class &&
(dent->d_serial < serial ||
(dent->d_serial == serial &&
cmp < 0))))
&&
(!best ||
best->d_class < dent->d_class ||
(best->d_class == dent->d_class &&
(best->d_serial < dent->d_serial ||
(best->d_serial == dent->d_serial &&
strcmp(best->d_path, dent->d_path) < 0))))) {
best = dent;
}
}
dent = kzalloc(cookie->allocsize, GFP_MARS);
if (unlikely(!dent))
goto err_mem1;
dent->d_name = kmalloc(namlen + 1, GFP_MARS);
if (unlikely(!dent->d_name))
goto err_mem2;
memcpy(dent->d_name, name, namlen);
dent->d_name[namlen] = '\0';
dent->d_namelen = namlen;
dent->d_rest = dent->d_name + prefix;
dent->d_path = newpath;
newpath = NULL;
dent->d_pathlen = pathlen;
INIT_LIST_HEAD(&dent->brick_list);
if (best) {
list_add(&dent->dent_link, &best->dent_link);
} else {
list_add_tail(&dent->dent_link, anchor);
}
found:
dent->d_type = d_type;
dent->d_class = class;
dent->d_serial = serial;
dent->d_parent = cookie->parent;
dent->d_depth = cookie->depth;
dent->d_global = global;
dent->d_killme = false;
if (newpath)
kfree(newpath);
return 0;
err_mem2:
kfree(dent);
err_mem1:
kfree(newpath);
err_mem0:
return -ENOMEM;
}
static int _mars_readdir(struct mars_cookie *cookie)
{
struct file *f;
mm_segment_t oldfs;
int status = 0;
oldfs = get_fs();
set_fs(get_ds());
f = filp_open(cookie->path, O_DIRECTORY | O_RDONLY, 0);
set_fs(oldfs);
if (unlikely(IS_ERR(f))) {
return PTR_ERR(f);
}
for (;;) {
status = vfs_readdir(f, mars_filler, cookie);
MARS_IO("vfs_readdir() status = %d\n", status);
if (status <= 0)
break;
}
filp_close(f, NULL);
return status;
}
int mars_dent_work(struct mars_global *global, char *dirname, int allocsize, mars_dent_checker checker, mars_dent_worker worker, void *buf, int maxdepth)
{
static int version = 0;
struct mars_cookie cookie = {
.global = global,
.checker = checker,
.path = dirname,
.pathlen = strlen(dirname),
.allocsize = allocsize,
.depth = 0,
};
struct list_head *tmp;
struct list_head *next;
int rounds = 0;
int status;
int total_status = 0;
bool found_dir;
/* Initialize the flat dent list
*/
version++;
total_status = _mars_readdir(&cookie);
if (total_status || !worker) {
goto done;
}
down_write(&global->dent_mutex);
restart:
found_dir = false;
/* First, get all the inode information in a separate pass
* before starting work.
* The separate pass is necessary because some dents may
* forward-reference other dents, and it would be a pity if
* some inodes were not available or were outdated.
*/
for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link);
// treat any member only once during this invocation
if (dent->d_version == version)
continue;
dent->d_version = version;
msleep(10); // yield
MARS_IO("reading inode '%s'\n", dent->d_path);
status = get_inode(dent->d_path, dent);
total_status |= status;
// recurse into subdirectories by inserting into the flat list
if (S_ISDIR(dent->new_stat.mode) && dent->d_depth <= maxdepth) {
struct mars_cookie sub_cookie = {
.global = global,
.checker = checker,
.path = dent->d_path,
.pathlen = dent->d_pathlen,
.allocsize = allocsize,
.parent = dent,
.depth = dent->d_depth + 1,
};
found_dir = true;
status = _mars_readdir(&sub_cookie);
total_status |= status;
if (status < 0) {
MARS_ERR("forward: status %d on '%s'\n", status, dent->d_path);
}
}
}
if (found_dir && ++rounds < 10) {
goto restart;
}
/* Remove all dents marked for removal.
*/
for (tmp = global->dent_anchor.next, next = tmp->next; tmp != &global->dent_anchor; tmp = next, next = next->next) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link);
if (!dent->d_killme)
continue;
MARS_DBG("killing dent '%s'\n", dent->d_path);
list_del_init(tmp);
//... FIXME memleak
}
up_write(&global->dent_mutex);
/* Forward pass.
*/
down_read(&global->dent_mutex);
for (tmp = global->dent_anchor.next, next = tmp->next; tmp != &global->dent_anchor; tmp = next, next = next->next) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link);
msleep(10); // yield
MARS_IO("forward treat '%s'\n", dent->d_path);
status = worker(buf, dent, false);
total_status |= status;
if (status < 0)
continue;
if (status < 0) {
MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path);
}
}
/* Backward pass.
*/
for (tmp = global->dent_anchor.prev; tmp != &global->dent_anchor; tmp = tmp->prev) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link);
msleep(10); // yield
MARS_IO("backward treat '%s'\n", dent->d_path);
status = worker(buf, dent, true);
total_status |= status;
if (status < 0) {
MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path);
}
}
up_read(&global->dent_mutex);
done:
return total_status;
}
EXPORT_SYMBOL_GPL(mars_dent_work);
static
struct mars_dent *_mars_find_dent(struct mars_global *global, const char *path)
{
struct mars_dent *res = NULL;
struct list_head *tmp;
if (!rwsem_is_locked(&global->dent_mutex)) {
MARS_ERR("dent_mutex not held!\n");
}
for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) {
struct mars_dent *tmp_dent = container_of(tmp, struct mars_dent, dent_link);
if (!strcmp(tmp_dent->d_path, path)) {
res = tmp_dent;
break;
}
}
return res;
}
//EXPORT_SYMBOL_GPL(_mars_find_dent);
struct mars_dent *mars_find_dent(struct mars_global *global, const char *path)
{
struct mars_dent *res;
//down_read(&global->dent_mutex);
res = _mars_find_dent(global, path);
//up_read(&global->dent_mutex);
return res;
}
EXPORT_SYMBOL_GPL(mars_find_dent);
#if 0 // old code! does not work! incorrect locking / races!
void mars_kill_dent(struct mars_dent *dent)
{
struct mars_global *global = dent->d_global;
struct list_head *oldtmp = NULL;
CHECK_PTR(global, done);
down(&global->mutex);
while (!list_empty(&dent->brick_list)) {
struct list_head *tmp = dent->brick_list.next;
struct mars_brick *brick = container_of(tmp, struct mars_brick, dent_brick_link);
// just satisfy "defensive" programming style...
if (unlikely(tmp == oldtmp)) {
MARS_ERR("oops, something is nasty here\n");
list_del_init(tmp);
continue;
}
oldtmp = tmp;
// killing a brick may take a long time...
up(&global->mutex);
mars_kill_brick(brick);
down(&global->mutex);
}
up(&global->mutex);
done: ;
}
#else
void mars_kill_dent(struct mars_dent *dent)
{
dent->d_killme = true;
while (!list_empty(&dent->brick_list)) {
struct list_head *tmp = dent->brick_list.next;
struct mars_brick *brick = container_of(tmp, struct mars_brick, dent_brick_link);
list_del_init(tmp);
// note: locking is now done there....
mars_kill_brick(brick);
}
}
#endif
EXPORT_SYMBOL_GPL(mars_kill_dent);
void mars_free_dent(struct mars_dent *dent)
{
int i;
mars_kill_dent(dent);
CHECK_HEAD_EMPTY(&dent->dent_link);
CHECK_HEAD_EMPTY(&dent->brick_list);
for (i = 0; i < MARS_ARGV_MAX; i++) {
if (dent->d_argv[i])
kfree(dent->d_argv[i]);
}
if (dent->d_args)
kfree(dent->d_args);
if (dent->d_private)
kfree(dent->d_private);
if (dent->old_link)
kfree(dent->old_link);
if (dent->new_link)
kfree(dent->new_link);
kfree(dent->d_name);
kfree(dent->d_path);
kfree(dent);
}
EXPORT_SYMBOL_GPL(mars_free_dent);
void mars_free_dent_all(struct list_head *anchor)
{
#if 0 // FIXME: locking
while (!list_empty(anchor)) {
struct mars_dent *dent;
dent = container_of(anchor->prev, struct mars_dent, dent_link);
mars_free_dent(dent);
}
#else // provisionary memleak
list_del_init(anchor);
#endif
}
EXPORT_SYMBOL_GPL(mars_free_dent_all);
/////////////////////////////////////////////////////////////////////
// low-level brick instantiation
struct mars_brick *mars_find_brick(struct mars_global *global, const void *brick_type, const char *path)
{
struct list_head *tmp;
if (!global || !path)
return NULL;
down_read(&global->brick_mutex);
for (tmp = global->brick_anchor.next; tmp != &global->brick_anchor; tmp = tmp->next) {
struct mars_brick *test = container_of(tmp, struct mars_brick, global_brick_link);
if (!strcmp(test->brick_path, path)) {
up_read(&global->brick_mutex);
if (brick_type && test->type != brick_type) {
MARS_ERR("bad brick type\n");
return NULL;
}
return test;
}
}
up_read(&global->brick_mutex);
return NULL;
}
EXPORT_SYMBOL_GPL(mars_find_brick);
int mars_free_brick(struct mars_brick *brick)
{
struct mars_global *global;
int i;
int status;
if (!brick) {
MARS_ERR("bad brick parameter\n");
status = -EINVAL;
goto done;
}
if (!brick->power.force_off || !brick->power.led_off) {
MARS_DBG("brick '%s' is not freeable\n", brick->brick_name);
status = -ETXTBSY;
goto done;
}
// first check whether the brick is in use somewhere
for (i = 0; i < brick->nr_outputs; i++) {
if (brick->outputs[i]->nr_connected > 0) {
MARS_DBG("brick '%s' not freeable, output %i is used\n", brick->brick_name, i);
status = -EEXIST;
goto done;
}
}
MARS_DBG("===> freeing brick name = '%s'\n", brick->brick_name);
global = brick->global;
if (global) {
down_write(&global->brick_mutex);
list_del_init(&brick->global_brick_link);
list_del_init(&brick->dent_brick_link);
up_write(&global->brick_mutex);
}
status = generic_brick_exit_full((void*)brick);
if (status >= 0) {
#ifndef MEMLEAK // TODO: check whether crash remains possible
if (brick->brick_name)
kfree(brick->brick_name);
if (brick->brick_path)
kfree(brick->brick_path);
kfree(brick);
#endif
mars_trigger();
} else {
MARS_ERR("error freeing brick, status = %d\n", status);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_free_brick);
struct mars_brick *mars_make_brick(struct mars_global *global, struct mars_dent *belongs, const void *_brick_type, const char *path, const char *_name)
{
const char *name = kstrdup(_name, GFP_MARS);
const char *names[] = { name };
const struct generic_brick_type *brick_type = _brick_type;
const struct generic_input_type **input_types;
const struct generic_output_type **output_types;
struct mars_brick *res;
int size;
int i;
int status;
if (!name) {
MARS_ERR("cannot allocate space for name\n");
return NULL;
}
size = brick_type->brick_size +
(brick_type->max_inputs + brick_type->max_outputs) * sizeof(void*);
input_types = brick_type->default_input_types;
for (i = 0; i < brick_type->max_inputs; i++) {
const struct generic_input_type *type = *input_types++;
if (unlikely(!type)) {
MARS_ERR("input_type %d is missing\n", i);
goto err_name;
}
if (unlikely(type->input_size <= 0)) {
MARS_ERR("bad input_size at %d\n", i);
goto err_name;
}
size += type->input_size;
}
output_types = brick_type->default_output_types;
for (i = 0; i < brick_type->max_outputs; i++) {
const struct generic_output_type *type = *output_types++;
if (unlikely(!type)) {
MARS_ERR("output_type %d is missing\n", i);
goto err_name;
}
if (unlikely(type->output_size <= 0)) {
MARS_ERR("bad output_size at %d\n", i);
goto err_name;
}
size += type->output_size;
}
res = kzalloc(size, GFP_MARS);
if (!res) {
MARS_ERR("cannot grab %d bytes for brick type '%s'\n", size, brick_type->type_name);
goto err_name;
}
res->global = global;
INIT_LIST_HEAD(&res->dent_brick_link);
res->brick_path = kstrdup(path, GFP_MARS);
if (!res->brick_path) {
MARS_ERR("cannot grab memory for path '%s'\n", path);
goto err_res;
}
status = generic_brick_init_full(res, size, brick_type, NULL, NULL, names);
MARS_DBG("brick '%s' init '%s' '%s' (status=%d)\n", brick_type->type_name, path, name, status);
if (status < 0) {
MARS_ERR("cannot init brick %s\n", brick_type->type_name);
goto err_path;
}
res->free = mars_free_brick;
/* Immediately make it visible, regardless of internal state.
* Switching on / etc must be done separately.
*/
down_write(&global->brick_mutex);
list_add(&res->global_brick_link, &global->brick_anchor);
if (belongs) {
list_add(&res->dent_brick_link, &belongs->brick_list);
}
up_write(&global->brick_mutex);
return res;
err_path:
kfree(res->brick_path);
err_res:
kfree(res);
err_name:
kfree(name);
return NULL;
}
EXPORT_SYMBOL_GPL(mars_make_brick);
int mars_kill_brick(struct mars_brick *brick)
{
struct mars_global *global;
int status = -EINVAL;
CHECK_PTR(brick, done);
global = brick->global;
CHECK_PTR(global, done);
MARS_DBG("===> killing brick path = '%s' name = '%s'\n", brick->brick_path, brick->brick_name);
// start shutdown
status = set_recursive_button((void*)brick, BR_FREE_ALL, 10 * HZ);
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_kill_brick);
/////////////////////////////////////////////////////////////////////
// mid-level brick instantiation (identity is based on path strings)
char *vpath_make(const char *fmt, va_list *args)
{
int len = strlen(fmt);
char *res = kmalloc(len + MARS_PATH_MAX, GFP_MARS);
if (likely(res)) {
vsnprintf(res, MARS_PATH_MAX, fmt, *args);
}
return res;
}
EXPORT_SYMBOL_GPL(vpath_make);
char *path_make(const char *fmt, ...)
{
va_list args;
char *res;
va_start(args, fmt);
res = vpath_make(fmt, &args);
va_end(args);
return res;
}
EXPORT_SYMBOL_GPL(path_make);
char *backskip_replace(const char *path, char delim, bool insert, const char *fmt, ...)
{
int path_len = strlen(path);
int total_len = strlen(fmt) + path_len + MARS_PATH_MAX;
char *res = kmalloc(total_len, GFP_MARS);
if (likely(res)) {
va_list args;
int pos = path_len;
int plus;
while (pos > 0 && path[pos] != '/') {
pos--;
}
if (delim != '/') {
while (pos < path_len && path[pos] != delim) {
pos++;
}
}
memcpy(res, path, pos);
va_start(args, fmt);
plus = vsnprintf(res + pos, total_len - pos, fmt, args);
va_end(args);
if (insert) {
strncpy(res + pos + plus, path + pos + 1, total_len - pos - plus);
}
}
return res;
}
EXPORT_SYMBOL_GPL(backskip_replace);
struct mars_brick *path_find_brick(struct mars_global *global, const void *brick_type, const char *fmt, ...)
{
va_list args;
char *fullpath;
struct mars_brick *res;
va_start(args, fmt);
fullpath = vpath_make(fmt, &args);
va_end(args);
if (!fullpath) {
return NULL;
}
res = mars_find_brick(global, brick_type, fullpath);
kfree(fullpath);
MARS_IO("search for '%s' found = %p\n", fullpath, res);
return res;
}
EXPORT_SYMBOL_GPL(path_find_brick);
const struct generic_brick_type *_client_brick_type = NULL;
EXPORT_SYMBOL_GPL(_client_brick_type);
const struct generic_brick_type *_bio_brick_type = NULL;
EXPORT_SYMBOL_GPL(_bio_brick_type);
const struct generic_brick_type *_aio_brick_type = NULL;
EXPORT_SYMBOL_GPL(_aio_brick_type);
struct mars_brick *make_brick_all(
struct mars_global *global,
struct mars_dent *belongs,
void (*setup_fn)(struct mars_brick *brick, void *private),
void *private,
int timeout,
const char *new_name,
const struct generic_brick_type *new_brick_type,
const struct generic_brick_type *prev_brick_type[],
const char *switch_fmt,
const char *new_fmt,
const char *prev_fmt[],
int prev_count,
...
)
{
va_list args;
const char *switch_path = NULL;
const char *new_path;
const char *_new_path = NULL;
struct mars_brick *brick = NULL;
char *paths[prev_count];
struct mars_brick *prev[prev_count];
int switch_state = true;
int i;
// treat variable arguments
va_start(args, prev_count);
if (switch_fmt) {
switch_state = false;
if (switch_fmt[0]) {
switch_path = vpath_make(switch_fmt, &args);
}
}
if (new_fmt) {
new_path = _new_path = vpath_make(new_fmt, &args);
} else {
new_path = new_name;
}
for (i = 0; i < prev_count; i++) {
paths[i] = vpath_make(prev_fmt[i], &args);
}
va_end(args);
if (!new_path) {
MARS_ERR("could not create new path\n");
goto err;
}
if (switch_path) {
struct mars_dent *test = mars_find_dent(global, switch_path);
if (test && test->new_link) {
sscanf(test->new_link, "%d", &switch_state);
}
}
// brick already existing?
brick = mars_find_brick(global, new_brick_type != _aio_brick_type && new_brick_type != _bio_brick_type ? new_brick_type : NULL, new_path);
if (brick) {
// just switch the power state
MARS_DBG("found existing brick '%s'\n", new_path);
goto do_switch;
}
if (!switch_state) { // don't start => also don't create
MARS_DBG("no need for brick '%s'\n", new_path);
goto done;
}
MARS_DBG("make new brick '%s'\n", new_path);
if (!new_name)
new_name = new_path;
MARS_DBG("----> new brick type = '%s' path = '%s' name = '%s'\n", new_brick_type->type_name, new_path, new_name);
// get all predecessor bricks
for (i = 0; i < prev_count; i++) {
char *path = paths[i];
if (!path) {
MARS_ERR("could not build path %d\n", i);
goto err;
}
prev[i] = mars_find_brick(global, prev_brick_type[i], path);
if (!prev[i]) {
MARS_ERR("prev brick '%s' does not exist\n", path);
goto err;
}
MARS_DBG("------> predecessor %d path = '%s'\n", i, path);
}
// some generic brick replacements (better performance / network functionality)
brick = NULL;
if ((new_brick_type == _bio_brick_type || new_brick_type == _aio_brick_type)
&& _client_brick_type != NULL) {
char *remote = strchr(new_name, '@');
if (remote) {
remote++;
MARS_DBG("substitute by remote brick '%s' on peer '%s'\n", new_name, remote);
brick = mars_make_brick(global, belongs, _client_brick_type, new_path, new_name);
if (brick) {
struct client_brick *_brick = (void*)brick;
_brick->max_flying = 10000;
}
}
}
if (!brick && new_brick_type == _bio_brick_type && _aio_brick_type) {
struct kstat test = {};
int status = mars_stat(new_path, &test, false);
if (SKIP_BIO || status < 0 || !S_ISBLK(test.mode)) {
new_brick_type = _aio_brick_type;
MARS_DBG("substitute bio by aio\n");
}
}
// create it...
if (!brick)
brick = mars_make_brick(global, belongs, new_brick_type, new_path, new_name);
if (unlikely(!brick)) {
MARS_ERR("creation failed '%s' '%s'\n", new_path, new_name);
goto err;
}
if (unlikely(brick->nr_inputs < prev_count)) {
MARS_ERR("wrong number of arguments: %d < %d\n", brick->nr_inputs, prev_count);
goto err;
}
// connect the wires
for (i = 0; i < prev_count; i++) {
int status;
status = generic_connect((void*)brick->inputs[i], (void*)prev[i]->outputs[0]);
if (unlikely(status < 0)) {
MARS_ERR("'%s' '%s' cannot connect input %d\n", new_path, new_name, i);
goto err;
}
}
// call setup function
if (setup_fn) {
setup_fn(brick, private);
}
do_switch:
// switch on/off (may fail silently, but responsibility is at the workers)
if (timeout > 0 || !switch_state) {
int status;
status = mars_power_button_recursive((void*)brick, switch_state, false, timeout);
MARS_DBG("switch %d status = %d\n", switch_state, status);
#if 0 // TODO: need cleanup_fn() here FIXME: interferes with logic needing the switched-off brick!
if (!switch_state && status >= 0 && !brick->power.button && brick->power.led_off) {
mars_kill_brick(brick);
brick = NULL;
}
#endif
}
goto done;
err:
if (brick) {
mars_kill_brick(brick);
}
brick = NULL;
done:
for (i = 0; i < prev_count; i++) {
if (paths[i]) {
kfree(paths[i]);
}
}
if (_new_path)
kfree(_new_path);
if (switch_path)
kfree(switch_path);
return brick;
}
EXPORT_SYMBOL_GPL(make_brick_all);
/////////////////////////////////////////////////////////////////////
// init stuff
#define LIMIT_MEM
#ifdef LIMIT_MEM
#include <linux/swap.h>
#include <linux/mm.h>
#endif
long long mars_global_memlimit = 0;
EXPORT_SYMBOL_GPL(mars_global_memlimit);
struct mm_struct *mm_fake = NULL;
EXPORT_SYMBOL_GPL(mm_fake);
static int __init init_mars(void)
{
MARS_INF("init_mars()\n");
#ifdef LIMIT_MEM // provisionary
mars_global_memlimit = total_swapcache_pages * (PAGE_SIZE / 4);
MARS_INF("mars_global_memlimit = %lld\n", mars_global_memlimit);
#endif
brick_obj_max = BRICK_OBJ_MAX;
mars_tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (!mars_tfm) {
MARS_ERR("cannot alloc crypto hash\n");
return -ENOMEM;
}
if (IS_ERR(mars_tfm)) {
MARS_ERR("alloc crypto hash failed, status = %d\n", PTR_ERR(mars_tfm));
return PTR_ERR(mars_tfm);
}
#if 0
if (crypto_tfm_alg_type(crypto_hash_tfm(mars_tfm)) != CRYPTO_ALG_TYPE_DIGEST) {
MARS_ERR("bad crypto hash type\n");
return -EINVAL;
}
#endif
mars_digest_size = crypto_hash_digestsize(mars_tfm);
MARS_INF("digest_size = %d\n", mars_digest_size);
set_fake();
#ifdef MARS_TRACING
{
int flags = O_CREAT | O_TRUNC | O_RDWR | O_LARGEFILE;
int prot = 0600;
mm_segment_t oldfs;
oldfs = get_fs();
set_fs(get_ds());
mars_log_file = filp_open("/mars/trace.csv", flags, prot);
set_fs(oldfs);
if (IS_ERR(mars_log_file)) {
MARS_ERR("cannot create trace logfile, status = %ld\n", PTR_ERR(mars_log_file));
mars_log_file = NULL;
}
}
#endif
return 0;
}
static void __exit exit_mars(void)
{
MARS_INF("exit_mars()\n");
if (id) {
kfree(id);
id = NULL;
}
if (mars_tfm) {
crypto_free_hash(mars_tfm);
}
put_fake();
#ifdef MARS_TRACING
if (mars_log_file) {
filp_close(mars_log_file, NULL);
mars_log_file = NULL;
}
#endif
}
MODULE_DESCRIPTION("MARS block storage");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(init_mars);
module_exit(exit_mars);