import mars-56.tgz

This commit is contained in:
Thomas Schoebel-Theuer 2011-02-23 21:48:06 +01:00
parent 048ef32de3
commit d9a6c14a89
26 changed files with 6087 additions and 200 deletions

28
Kconfig
View File

@ -63,9 +63,37 @@ config MARS_TRANS_LOGGER
---help---
Experimental storage System.
config MARS_SERVER
tristate "server brick"
depends on MARS
default m
---help---
Experimental storage System.
config MARS_CLIENT
tristate "client brick"
depends on MARS
default m
---help---
Experimental storage System.
config MARS_COPY
tristate "copy brick"
depends on MARS
default m
---help---
Experimental storage System.
config MARS_TEST
tristate "provisionary TEST"
depends on MARS
default m
---help---
Experimental storage System.
config MARS_LIGHT
tristate "MARS Light"
depends on MARS
default m
---help---
Experimental storage System.

View File

@ -2,7 +2,7 @@
# Makefile for MARS
#
obj-$(CONFIG_MARS) += brick.o mars_generic.o
obj-$(CONFIG_MARS) += brick.o mars_generic.o mars_net.o
obj-$(CONFIG_MARS_DUMMY) += mars_dummy.o
obj-$(CONFIG_MARS_CHECK) += mars_check.o
obj-$(CONFIG_MARS_IF_DEVICE) += mars_if_device.o
@ -11,7 +11,11 @@ obj-$(CONFIG_MARS_DEVICE_SIO) += mars_device_sio.o
obj-$(CONFIG_MARS_BUF) += mars_buf.o
obj-$(CONFIG_MARS_USEBUF) += mars_usebuf.o
obj-$(CONFIG_MARS_TRANS_LOGGER) += mars_trans_logger.o
obj-$(CONFIG_MARS_SERVER) += mars_server.o
obj-$(CONFIG_MARS_CLIENT) += mars_client.o
obj-$(CONFIG_MARS_COPY) += mars_copy.o
obj-$(CONFIG_MARS_TEST) += mars_test.o
obj-$(CONFIG_MARS_LIGHT) += mars_light.o
#mars-objs := mars_generic.o

139
brick.c
View File

@ -3,8 +3,10 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/semaphore.h>
//#define BRICK_DEBUGGING
#define USE_FREELIST
#define _STRATEGY
@ -35,8 +37,8 @@ int generic_register_brick_type(const struct generic_brick_type *new_type)
continue;
}
if (!strcmp(brick_types[i]->type_name, new_type->type_name)) {
BRICK_ERR("sorry, bricktype %s is already registered.\n", new_type->type_name);
return -EEXIST;
BRICK_DBG("bricktype %s is already registered.\n", new_type->type_name);
return 0;
}
}
if (found < 0) {
@ -65,16 +67,21 @@ int generic_brick_init_full(
const struct generic_brick_type *brick_type,
const struct generic_input_type **input_types,
const struct generic_output_type **output_types,
char **names)
const char **names)
{
struct generic_brick *brick = data;
int status;
int i;
BRICK_DBG("generic_brick_init_full()\n");
// first, call the generic constructors
BRICK_DBG("brick_type = %s\n", brick_type->type_name);
if (unlikely(!data)) {
BRICK_ERR("invalid memory\n");
return -EINVAL;
}
status = generic_brick_init(brick_type, brick, *names++);
// call the generic constructors
status = generic_brick_init(brick_type, brick, names ? *names++ : NULL);
if (status)
return status;
data += brick_type->brick_size;
@ -100,7 +107,7 @@ int generic_brick_init_full(
struct generic_input *input = data;
const struct generic_input_type *type = *input_types++;
BRICK_DBG("generic_brick_init_full: calling generic_input_init()\n");
status = generic_input_init(brick, i, type, input, names ? *names++ : type->type_name);
status = generic_input_init(brick, i, type, input, (names && *names) ? *names++ : type->type_name);
if (status)
return status;
data += type->input_size;
@ -125,7 +132,7 @@ int generic_brick_init_full(
struct generic_output *output = data;
const struct generic_output_type *type = *output_types++;
BRICK_DBG("generic_brick_init_full: calling generic_output_init()\n");
generic_output_init(brick, i, type, output, names ? *names++ : type->type_name);
generic_output_init(brick, i, type, output, (names && *names) ? *names++ : type->type_name);
if (status)
return status;
data += type->output_size;
@ -532,4 +539,120 @@ void free_generic(struct generic_object *object)
}
EXPORT_SYMBOL_GPL(free_generic);
/////////////////////////////////////////////////////////////////
// helper stuff
struct semaphore lamport_sem = __SEMAPHORE_INITIALIZER(lamport_sem, 1); // TODO: replace with spinlock if possible (first check)
struct timespec lamport_now = {};
void get_lamport(struct timespec *now)
{
int diff;
down(&lamport_sem);
*now = CURRENT_TIME;
diff = timespec_compare(now, &lamport_now);
if (diff > 0) {
memcpy(&lamport_now, now, sizeof(lamport_now));
} else {
timespec_add_ns(&lamport_now, 1);
memcpy(now, &lamport_now, sizeof(*now));
}
up(&lamport_sem);
}
EXPORT_SYMBOL_GPL(get_lamport);
void set_lamport(struct timespec *old)
{
int diff;
down(&lamport_sem);
diff = timespec_compare(old, &lamport_now);
if (diff > 0) {
memcpy(&lamport_now, old, sizeof(lamport_now));
}
up(&lamport_sem);
}
EXPORT_SYMBOL_GPL(set_lamport);
void set_button(struct generic_switch *sw, bool val)
{
bool oldval = sw->button;
if (val != oldval) {
sw->button = val;
sw->trigger = true;
wake_up_interruptible(&sw->event);
}
}
EXPORT_SYMBOL_GPL(set_button);
void set_led_on(struct generic_switch *sw, bool val)
{
bool oldval = sw->led_on;
if (val != oldval) {
sw->led_on = val;
sw->trigger = true;
wake_up_interruptible(&sw->event);
}
}
EXPORT_SYMBOL_GPL(set_led_on);
void set_led_off(struct generic_switch *sw, bool val)
{
bool oldval = sw->led_off;
if (val != oldval) {
sw->led_off = val;
sw->trigger = true;
wake_up_interruptible(&sw->event);
}
}
EXPORT_SYMBOL_GPL(set_led_off);
/////////////////////////////////////////////////////////////////
// meta stuff
const struct meta *find_meta(const struct meta *meta, const char *field_name)
{
const struct meta *tmp;
for (tmp = meta; tmp->field_name[0]; tmp++) {
if (!strncmp(field_name, tmp->field_name, MAX_FIELD_LEN)) {
return tmp;
}
}
return NULL;
}
EXPORT_SYMBOL_GPL(find_meta);
void free_meta(void *data, const struct meta *meta)
{
for (; meta->field_name[0]; meta++) {
void *item;
switch (meta->field_type) {
case FIELD_SUB:
if (meta->field_ref) {
item = data + meta->field_offset;
free_meta(item, meta->field_ref);
}
break;
case FIELD_REF:
case FIELD_STRING:
item = data + meta->field_offset;
item = *(void**)item;
if (meta->field_ref)
free_meta(item, meta->field_ref);
kfree(item);
}
}
}
EXPORT_SYMBOL_GPL(free_meta);
MODULE_LICENSE("GPL");

175
brick.h
View File

@ -2,8 +2,10 @@
#ifndef BRICK_H
#define BRICK_H
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/wait.h>
#ifdef _STRATEGY
#define _STRATEGY_CODE(X) X
@ -13,14 +15,16 @@
#define _NORMAL_CODE(X) X
#endif
#define BRICK_ERROR "BRICK_ERROR " __BASE_FILE__ ": "
#define BRICK_INFO "BRICK_INFO " __BASE_FILE__ ": "
#define BRICK_DEBUG "BRICK_DEBUG " __BASE_FILE__ ": "
#define BRICK_ERROR "BRICK_ERROR "
#define BRICK_INFO "BRICK_INFO "
#define BRICK_DEBUG "BRICK_DEBUG "
//#define _BRICK_FMT(fmt) "[%s] " __BASE_FILE__ " %d %s(): " fmt, current->comm, __LINE__, __FUNCTION__
#define _BRICK_FMT(fmt) __BASE_FILE__ " %d %s(): " fmt, __LINE__, __FUNCTION__
#define BRICK_ERR(fmt, args...) printk(BRICK_ERROR "%s(): " fmt, __FUNCTION__, ##args)
#define BRICK_INF(fmt, args...) printk(BRICK_INFO "%s(): " fmt, __FUNCTION__, ##args)
#define BRICK_ERR(fmt, args...) printk(BRICK_ERROR _BRICK_FMT(fmt), ##args)
#define BRICK_INF(fmt, args...) printk(BRICK_INFO _BRICK_FMT(fmt), ##args)
#ifdef BRICK_DEBUGGING
#define BRICK_DBG(fmt, args...) printk(BRICK_DEBUG "%s(): " fmt, __FUNCTION__, ##args)
#define BRICK_DBG(fmt, args...) printk(BRICK_DEBUG _BRICK_FMT(fmt), ##args)
#else
#define BRICK_DBG(args...) /**/
#endif
@ -120,14 +124,23 @@ struct generic_brick_ops;
struct generic_output_ops;
struct generic_brick_type;
struct generic_switch {
bool button;
bool led_on;
bool led_off;
bool trigger;
wait_queue_head_t event;
};
#define GENERIC_BRICK(BRICK) \
char *brick_name; \
const char *brick_name; \
const struct BRICK##_brick_type *type; \
struct BRICK##_brick_ops *ops; \
int nr_inputs; \
int nr_outputs; \
struct BRICK##_input **inputs; \
struct BRICK##_output **outputs; \
struct generic_switch power; \
struct list_head tmp_head; \
struct generic_brick {
@ -135,7 +148,7 @@ struct generic_brick {
};
#define GENERIC_INPUT(BRICK) \
char *input_name; \
const char *input_name; \
struct BRICK##_brick *brick; \
const struct BRICK##_input_type *type; \
struct BRICK##_output *connect; \
@ -146,7 +159,7 @@ struct generic_input {
};
#define GENERIC_OUTPUT(BRICK) \
char *output_name; \
const char *output_name; \
struct BRICK##_brick *brick; \
const struct BRICK##_output_type *type; \
struct BRICK##_output_ops *ops; \
@ -176,7 +189,7 @@ struct generic_output {
)
#define GENERIC_BRICK_OPS(BRICK) \
int (*brick_switch)(struct BRICK##_brick *brick, bool state); \
int (*brick_switch)(struct BRICK##_brick *brick); \
struct generic_brick_ops {
GENERIC_BRICK_OPS(generic);
@ -193,14 +206,14 @@ struct generic_output_ops {
// although possible, *_type should never be extended
#define GENERIC_BRICK_TYPE(BRICK) \
char *type_name; \
const char *type_name; \
int brick_size; \
int max_inputs; \
int max_outputs; \
const struct BRICK##_input_type **default_input_types; \
char **default_input_names; \
const char **default_input_names; \
const struct BRICK##_output_type **default_output_types; \
char **default_output_names; \
const char **default_output_names; \
struct BRICK##_brick_ops *master_ops; \
const struct BRICK##_input_types **default_type; \
int (*brick_construct)(struct BRICK##_brick *brick); \
@ -242,7 +255,7 @@ struct generic_output_type {
int generic_register_brick_type(const struct generic_brick_type *new_type);
int generic_unregister_brick_type(const struct generic_brick_type *old_type);
extern inline void _generic_output_init(struct generic_brick *brick, const struct generic_output_type *type, struct generic_output *output, char *output_name)
inline void _generic_output_init(struct generic_brick *brick, const struct generic_output_type *type, struct generic_output *output, const char *output_name)
{
output->output_name = output_name;
output->brick = brick;
@ -254,18 +267,22 @@ extern inline void _generic_output_init(struct generic_brick *brick, const struc
#ifdef _STRATEGY // call this only in strategy bricks, never in ordinary bricks
// you need this only if you circumvent generic_brick_init_full()
extern inline int generic_brick_init(const struct generic_brick_type *type, struct generic_brick *brick, char *brick_name)
inline int generic_brick_init(const struct generic_brick_type *type, struct generic_brick *brick, const char *brick_name)
{
brick->brick_name = brick_name;
brick->type = type;
brick->ops = type->master_ops;
brick->nr_inputs = 0;
brick->nr_outputs = 0;
brick->power.led_off = true;
//brick->power.event = __WAIT_QUEUE_HEAD_INITIALIZER(brick->power.event);
init_waitqueue_head(&brick->power.event);
//INIT_LIST_HEAD(&brick->tmp_head);
brick->tmp_head.next = brick->tmp_head.prev = &brick->tmp_head;
return 0;
}
extern inline int generic_input_init(struct generic_brick *brick, int index, const struct generic_input_type *type, struct generic_input *input, char *input_name)
inline int generic_input_init(struct generic_brick *brick, int index, const struct generic_input_type *type, struct generic_input *input, const char *input_name)
{
if (index < 0 || index >= brick->type->max_inputs)
return -ENOMEM;
@ -280,7 +297,7 @@ extern inline int generic_input_init(struct generic_brick *brick, int index, con
return 0;
}
extern inline int generic_output_init(struct generic_brick *brick, int index, const struct generic_output_type *type, struct generic_output *output, char *output_name)
inline int generic_output_init(struct generic_brick *brick, int index, const struct generic_output_type *type, struct generic_output *output, const char *output_name)
{
if (index < 0 || index >= brick->type->max_outputs)
return -ENOMEM;
@ -292,7 +309,7 @@ extern inline int generic_output_init(struct generic_brick *brick, int index, co
return 0;
}
extern inline int generic_size(const struct generic_brick_type *brick_type)
inline int generic_size(const struct generic_brick_type *brick_type)
{
int size = brick_type->brick_size;
int i;
@ -316,12 +333,12 @@ int generic_brick_init_full(
const struct generic_brick_type *brick_type,
const struct generic_input_type **input_types,
const struct generic_output_type **output_types,
char **names);
const char **names);
int generic_brick_exit_full(
struct generic_brick *brick);
extern inline int generic_connect(struct generic_input *input, struct generic_output *output)
inline int generic_connect(struct generic_input *input, struct generic_output *output)
{
BRICK_DBG("generic_connect(input=%p, output=%p)\n", input, output);
if (!input || !output)
@ -334,7 +351,7 @@ extern inline int generic_connect(struct generic_input *input, struct generic_ou
return 0;
}
extern inline int generic_disconnect(struct generic_input *input)
inline int generic_disconnect(struct generic_input *input)
{
BRICK_DBG("generic_disconnect(input=%p)\n", input);
if (!input)
@ -426,14 +443,14 @@ static inline int BRICK##_output_init(struct BRICK##_brick *brick, int index, st
\
_STRATEGY_CODE( \
\
extern inline int INPUT_BRICK##_##OUTPUT_BRICK##_connect( \
inline int INPUT_BRICK##_##OUTPUT_BRICK##_connect( \
struct INPUT_BRICK##_input *input, \
struct OUTPUT_BRICK##_output *output) \
{ \
return generic_connect((struct generic_input*)input, (struct generic_output*)output); \
} \
\
extern inline int INPUT_BRICK##_##OUTPUT_BRICK####_disconnect( \
inline int INPUT_BRICK##_##OUTPUT_BRICK####_disconnect( \
struct INPUT_BRICK##_input *input) \
{ \
return generic_disconnect((struct generic_input*)input); \
@ -456,7 +473,7 @@ extern void free_generic(struct generic_object *object);
#define GENERIC_OBJECT_LAYOUT_FUNCTIONS(BRICK) \
\
extern inline int BRICK##_init_object_layout(struct BRICK##_output *output, struct generic_object_layout *object_layout, int aspect_max, const struct generic_object_type *object_type) \
inline int BRICK##_init_object_layout(struct BRICK##_output *output, struct generic_object_layout *object_layout, int aspect_max, const struct generic_object_type *object_type) \
{ \
if (likely(object_layout->object_type)) \
return 0; \
@ -465,7 +482,7 @@ extern inline int BRICK##_init_object_layout(struct BRICK##_output *output, stru
#define GENERIC_ASPECT_LAYOUT_FUNCTIONS(BRICK,TYPE) \
\
extern inline int BRICK##_##TYPE##_add_aspect(struct BRICK##_output *output, struct TYPE##_object_layout *object_layout, const struct generic_aspect_type *aspect_type) \
inline int BRICK##_##TYPE##_add_aspect(struct BRICK##_output *output, struct TYPE##_object_layout *object_layout, const struct generic_aspect_type *aspect_type) \
{ \
int res = generic_add_aspect((struct generic_output*)output, (struct generic_object_layout *)object_layout, aspect_type); \
BRICK_DBG(#BRICK " " #TYPE "added aspect_type %p (%s) to object_layout %p (type %s) on output %p (type %s), status=%d\n", aspect_type, aspect_type->aspect_type_name, object_layout, object_layout->object_type->object_type_name, output, output->type->type_name, res); \
@ -474,7 +491,7 @@ extern inline int BRICK##_##TYPE##_add_aspect(struct BRICK##_output *output, str
#define GENERIC_OBJECT_FUNCTIONS(TYPE) \
\
extern inline struct TYPE##_object *TYPE##_construct(void *data, struct TYPE##_object_layout *object_layout) \
inline struct TYPE##_object *TYPE##_construct(void *data, struct TYPE##_object_layout *object_layout) \
{ \
struct TYPE##_object *obj = data; \
int i; \
@ -504,7 +521,7 @@ extern inline struct TYPE##_object *TYPE##_construct(void *data, struct TYPE##_o
return obj; \
} \
\
extern inline void TYPE##_destruct(struct TYPE##_object *obj) \
inline void TYPE##_destruct(struct TYPE##_object *obj) \
{ \
struct TYPE##_object_layout *object_layout = obj->object_layout; \
int i; \
@ -532,7 +549,7 @@ extern inline void TYPE##_destruct(struct TYPE##_object *obj) \
#define GENERIC_ASPECT_FUNCTIONS(BRICK,TYPE) \
\
extern inline struct BRICK##_##TYPE##_aspect *BRICK##_##TYPE##_get_aspect(struct BRICK##_output *output, struct TYPE##_object *obj) \
inline struct BRICK##_##TYPE##_aspect *BRICK##_##TYPE##_get_aspect(struct BRICK##_output *output, struct TYPE##_object *obj) \
{ \
struct generic_object_layout *object_layout; \
struct generic_aspect_layout *aspect_layout; \
@ -548,12 +565,12 @@ extern inline struct BRICK##_##TYPE##_aspect *BRICK##_##TYPE##_get_aspect(struct
return (void*)obj + aspect_layout->aspect_offset; \
} \
\
extern inline int BRICK##_##TYPE##_init_object_layout(struct BRICK##_output *output, struct generic_object_layout *object_layout) \
inline int BRICK##_##TYPE##_init_object_layout(struct BRICK##_output *output, struct generic_object_layout *object_layout) \
{ \
return BRICK##_init_object_layout(output, object_layout, 32, &TYPE##_type); \
} \
\
extern inline struct TYPE##_object *BRICK##_alloc_##TYPE(struct BRICK##_output *output, struct generic_object_layout *object_layout) \
inline struct TYPE##_object *BRICK##_alloc_##TYPE(struct BRICK##_output *output, struct generic_object_layout *object_layout) \
{ \
int status = BRICK##_##TYPE##_init_object_layout(output, object_layout); \
if (status < 0) \
@ -561,12 +578,12 @@ extern inline struct TYPE##_object *BRICK##_alloc_##TYPE(struct BRICK##_output *
return (struct TYPE##_object*)alloc_generic(object_layout); \
} \
\
extern inline struct TYPE##_object *BRICK##_alloc_##TYPE##_pure(struct generic_object_layout *object_layout) \
inline struct TYPE##_object *BRICK##_alloc_##TYPE##_pure(struct generic_object_layout *object_layout) \
{ \
return (struct TYPE##_object*)alloc_generic(object_layout); \
} \
\
extern inline void BRICK##_free_##TYPE(struct TYPE##_object *object) \
inline void BRICK##_free_##TYPE(struct TYPE##_object *object) \
{ \
free_generic((struct generic_object*)object); \
} \
@ -577,12 +594,19 @@ GENERIC_OBJECT_FUNCTIONS(generic);
///////////////////////////////////////////////////////////////////////
// some helpers
// some general helpers
extern void get_lamport(struct timespec *now);
extern void set_lamport(struct timespec *old);
#if 0
#undef spin_lock_irqsave
#define spin_lock_irqsave(l,f) spin_lock(l)
#undef spin_unlock_irqrestore
#define spin_unlock_irqrestore(l,f) spin_unlock(l)
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
@ -654,4 +678,89 @@ GENERIC_OBJECT_FUNCTIONS(generic);
# define traced_writeunlock(spinlock,flags) write_unlock_irqrestore(spinlock,flags)
#endif
extern void set_button(struct generic_switch *sw, bool val);
extern void set_led_on(struct generic_switch *sw, bool val);
extern void set_led_off(struct generic_switch *sw, bool val);
/////////////////////////////////////////////////////////////////////////
// metadata descriptions
/* The idea is to describe your C structures in such a way that
* transfers to disk or over a network become self-describing.
*
* In essence, this is a kind of version-independent marshalling.
*
* Advantage:
* When you extend your original C struct (and of course update the
* corresponding meta structure), old data on disk (or network peers
* running an old version of your program) will remain valid.
* Upon read, newly added fields missing in the old version will be simply
* not filled in and therefore remain zeroed (if you don't forget to
* initially clear your structures via memset() / initializers / etc).
* Note that this works only if you never rename or remove existing
* fields; you should only add new ones.
* [TODO: add macros for description of ignored / renamed fields to
* overcome this limitation]
* You may increase the size of integers, for example from 32bit to 64bit
* or even higher; sign extension will be automatically carried out
* when necessary.
* [TODO; NYI]
* Also, you may change the order of fields, because the metadata interpreter
* will check each field individually; field offsets are automatically
* maintained.
*
* Disadvantage: this adds some (small) overhead.
*/
#define MAX_FIELD_LEN 24
enum field_type {
FIELD_DONE,
FIELD_REF,
FIELD_SUB,
FIELD_STRING,
FIELD_RAW,
FIELD_INT,
FIELD_UINT,
};
struct meta {
char field_name[MAX_FIELD_LEN];
int field_type;
int field_size;
int field_offset;
const struct meta *field_ref;
};
#define _META_INI(NAME,STRUCT,TYPE) \
.field_name = #NAME, \
.field_type = TYPE, \
.field_size = sizeof(((STRUCT*)NULL)->NAME), \
.field_offset = offsetof(STRUCT, NAME) \
#define META_INI(NAME,STRUCT,TYPE) { _META_INI(NAME,STRUCT,TYPE) }
#define _META_INI_REF(NAME,STRUCT,REF) \
.field_name = #NAME, \
.field_type = FIELD_REF, \
.field_size = sizeof(*(((STRUCT*)NULL)->NAME)), \
.field_offset = offsetof(STRUCT, NAME), \
.field_ref = REF
#define META_INI_REF(NAME,STRUCT,REF) { _META_INI_REF(NAME,STRUCT,REF) }
#define _META_INI_SUB(NAME,STRUCT,SUB) \
.field_name = #NAME, \
.field_type = FIELD_SUB, \
.field_size = sizeof(((STRUCT*)NULL)->NAME), \
.field_offset = offsetof(STRUCT, NAME), \
.field_ref = SUB
#define META_INI_SUB(NAME,STRUCT,SUB) { _META_INI_SUB(NAME,STRUCT,SUB) }
extern const struct meta *find_meta(const struct meta *meta, const char *field_name);
extern void free_meta(void *data, const struct meta *meta);
#endif

View File

@ -1,4 +1,12 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
/* Definitions for logfile format.
*
* This is meant for sharing between different transaction logger variants,
* and/or for sharing with userspace tools (e.g. logfile analyzers).
* TODO: factor out kernelspace issues.
*/
#ifndef LOG_FORMAT_H
#define LOG_FORMAT_H
@ -182,7 +190,7 @@ bool log_finalize(struct log_status *logst, int len, void (*endio)(struct generi
DATA_PUT(data, offset, (char)0); // spare
DATA_PUT(data, offset, (short)0); // spare
DATA_PUT(data, offset, (int)0); // spare
now = CURRENT_TIME; // when the log entry was ready.
get_lamport(&now); // when the log entry was ready.
DATA_PUT(data, offset, now.tv_sec);
DATA_PUT(data, offset, now.tv_nsec);

137
mars.h
View File

@ -3,25 +3,34 @@
#define MARS_H
#include <linux/list.h>
#include <linux/semaphore.h>
#include <asm/spinlock.h>
#include <asm/atomic.h>
#define MARS_DELAY /**/
//#define MARS_DELAY msleep(20000)
#define MARS_FATAL "MARS_FATAL " __BASE_FILE__ ": "
#define MARS_ERROR "MARS_ERROR " __BASE_FILE__ ": "
#define MARS_INFO "MARS_INFO " __BASE_FILE__ ": "
#define MARS_DEBUG "MARS_DEBUG " __BASE_FILE__ ": "
#define MARS_FATAL "MARS_FATAL "
#define MARS_ERROR "MARS_ERROR "
#define MARS_INFO "MARS_INFO "
#define MARS_DEBUG "MARS_DEBUG "
#define _MARS_FMT(fmt) "[%s] " __BASE_FILE__ " %d %s(): " fmt, current->comm, __LINE__, __FUNCTION__
//#define _MARS_FMT(fmt) _BRICK_FMT(fmt)
#define MARS_FAT(fmt, args...) do { printk(MARS_FATAL "%s(): " fmt, __FUNCTION__, ##args); MARS_DELAY; } while (0)
#define MARS_ERR(fmt, args...) do { printk(MARS_ERROR "%s(): " fmt, __FUNCTION__, ##args); MARS_DELAY; } while (0)
#define MARS_INF(fmt, args...) do { printk(MARS_INFO "%s(): " fmt, __FUNCTION__, ##args); } while (0)
#define MARS_FAT(fmt, args...) do { printk(MARS_FATAL _MARS_FMT(fmt), ##args); MARS_DELAY; } while (0)
#define MARS_ERR(fmt, args...) do { printk(MARS_ERROR _MARS_FMT(fmt), ##args); MARS_DELAY; } while (0)
#define MARS_INF(fmt, args...) do { printk(MARS_INFO _MARS_FMT(fmt), ##args); } while (0)
#ifdef MARS_DEBUGGING
#define MARS_DBG(fmt, args...) do { printk(MARS_DEBUG "%s(): " fmt, __FUNCTION__, ##args); } while (0)
#define MARS_DBG(fmt, args...) do { printk(MARS_DEBUG _MARS_FMT(fmt), ##args); } while (0)
#else
#define MARS_DBG(args...) /**/
#endif
#ifdef IO_DEBUGGING
#define MARS_IO MARS_DBG
#else
#define MARS_IO(args...) /*empty*/
#endif
#define BRICK_OBJ_MREF 0
#define BRICK_OBJ_NR 1
@ -66,6 +75,7 @@ struct mref_object_layout {
/* maintained by the ref implementation, readable for callers */ \
int ref_flags; \
int ref_rw; \
int ref_id; /* not mandatory; may be used for identification */ \
/* maintained by the ref implementation, incrementable for \
* callers (but not decrementable! use ref_put()) */ \
atomic_t ref_count; \
@ -90,6 +100,9 @@ struct mars_info {
#define MARS_BRICK(PREFIX) \
GENERIC_BRICK(PREFIX); \
struct list_head brick_link; \
const char *brick_path; \
struct mars_global *global; \
struct mars_brick {
MARS_BRICK(mars);
@ -214,8 +227,6 @@ static const struct generic_aspect_type *BRICK##_aspect_types[BRICK_OBJ_NR] = {
MARS_ERR("%d: list_head " #head " (%p) not empty\n", __LINE__, head); \
} \
#endif
#define CHECK_PTR(ptr,label) \
if (unlikely(!(ptr))) { \
MARS_FAT("%d: ptr " #ptr " is NULL\n", __LINE__); \
@ -227,3 +238,109 @@ static const struct generic_aspect_type *BRICK##_aspect_types[BRICK_OBJ_NR] = {
MARS_FAT("%d: condition " #ptr " is VIOLATED\n", __LINE__); \
goto label; \
}
extern const struct meta mars_info_meta[];
extern const struct meta mars_mref_meta[];
/////////////////////////////////////////////////////////////////////////
extern struct mars_global *mars_global;
extern void mars_trigger(void);
extern void mars_power_button(struct mars_brick *brick, bool val);
extern void mars_power_led_on(struct mars_brick *brick, bool val);
extern void mars_power_led_off(struct mars_brick *brick, bool val);
/////////////////////////////////////////////////////////////////////////
#ifdef _STRATEGY // call this only in strategy bricks, never in ordinary bricks
#define MARS_ARGV_MAX 4
extern char *my_id(void);
#define MARS_DENT(TYPE) \
struct list_head sub_link; \
struct TYPE *d_parent; \
char *d_argv[MARS_ARGV_MAX]; /* for internal use, will be automatically deallocated*/ \
char *d_args; /* ditto uninterpreted */ \
char *d_name; /* current path component */ \
char *d_rest; /* some "meaningful" rest of d_name*/ \
char *d_path; /* full absolute path */ \
int d_namelen; \
int d_pathlen; \
int d_depth; \
unsigned int d_type; /* from readdir() => often DT_UNKNOWN => don't rely on it, use new_stat.mode instead */ \
int d_class; /* for pre-grouping order */ \
int d_serial; /* for pre-grouping order */ \
int d_version; /* dynamic programming per call of mars_ent_work() */ \
char d_error; \
struct kstat new_stat; \
struct kstat old_stat; \
char *new_link; \
char *old_link; \
void *d_private;
struct mars_dent {
MARS_DENT(mars_dent);
};
extern const struct meta mars_timespec_meta[];
extern const struct meta mars_kstat_meta[];
extern const struct meta mars_dent_meta[];
struct mars_global {
struct list_head dent_anchor;
struct list_head brick_anchor;
struct generic_switch global_power;
struct semaphore mutex;
volatile bool main_trigger;
wait_queue_head_t main_event;
//void *private;
};
typedef int (*mars_dent_checker)(const char *path, const char *name, int namlen, unsigned int d_type, int *prefix, int *serial);
typedef int (*mars_dent_worker)(struct mars_global *global, struct mars_dent *dent, bool direction);
extern int mars_dent_work(struct mars_global *global, char *dirname, int allocsize, mars_dent_checker checker, mars_dent_worker worker, void *buf, int maxdepth);
extern struct mars_dent *_mars_find_dent(struct mars_global *global, const char *path);
extern struct mars_dent *mars_find_dent(struct mars_global *global, const char *path);
extern void mars_dent_free(struct mars_dent *dent);
extern void mars_dent_free_all(struct list_head *anchor);
extern struct mars_brick *mars_find_brick(struct mars_global *global, const void *brick_type, const char *path);
extern struct mars_brick *mars_make_brick(struct mars_global *global, const void *_brick_type, const char *path, const char *name);
#define MARS_ERR_ONCE(dent, args...) if (!dent->d_error++) MARS_ERR(args)
/* Kludge: our kernel threads will have no mm context, but need one
* for stuff like ioctx_alloc() / aio_setup_ring() etc
* which expect userspace resources.
* We fake one.
* TODO: factor out the userspace stuff from AIO such that
* this fake is no longer necessary.
* Even better: replace do_mmap() in AIO stuff by something
* more friendly to kernelspace apps.
*/
inline void fake_mm(void)
{
if (!current->mm) {
current->mm = &init_mm;
}
}
/* Cleanup faked mm, otherwise do_exit() will try to destroy
* the wrong thing....
*/
inline void cleanup_mm(void)
{
if (current->mm == &init_mm) {
current->mm = NULL;
}
}
extern int mars_mkdir(const char *path);
extern int mars_symlink(const char *oldpath, const char *newpath, const struct timespec *stamp);
extern int mars_rename(const char *oldpath, const char *newpath);
#endif
#endif

509
mars_client.c Normal file
View File

@ -0,0 +1,509 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
// Client brick (just for demonstration)
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
//#define IO_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kthread.h>
#include "mars.h"
///////////////////////// own type definitions ////////////////////////
#include "mars_client.h"
///////////////////////// own helper functions ////////////////////////
static int thread_count = 0;
static void _kill_socket(struct client_output *output)
{
if (output->socket) {
MARS_DBG("shutdown socket\n");
kernel_sock_shutdown(output->socket, SHUT_WR);
//sock_release(output->socket);
output->socket = NULL;
}
}
static void _kill_thread(struct client_threadinfo *ti)
{
if (ti->thread) {
kthread_stop(ti->thread);
}
}
static int _connect(struct client_output *output, const char *str)
{
struct sockaddr_storage sockaddr = {};
int status;
if (!output->host) {
output->host = kstrdup(str, GFP_MARS);
status = -EINVAL;
if (!output->host)
goto done;
output->path = strchr(output->host, '+');
if (!output->path) {
kfree(output->host);
output->host = NULL;
goto done;
}
*output->path++ = '\0';
}
status = mars_create_sockaddr(&sockaddr, output->host);
if (unlikely(status < 0))
goto done;
status = mars_create_socket(&output->socket, &sockaddr, false);
if (unlikely(status < 0)) {
output->socket = NULL;
goto done;
}
{
struct mars_cmd cmd = {
.cmd_code = CMD_CONNECT,
.cmd_str1 = output->path,
};
status = mars_send_struct(&output->socket, &cmd, mars_cmd_meta);
if (unlikely(status < 0))
goto done;
}
if (status >= 0) {
struct mars_cmd cmd = {
.cmd_code = CMD_GETINFO,
};
status = mars_send_struct(&output->socket, &cmd, mars_cmd_meta);
}
done:
if (status < 0) {
MARS_INF("cannot connect to remote host '%s' (status = %d) -- retrying\n", output->host ? output->host : "NULL", status);
_kill_socket(output);
}
return status;
}
////////////////// own brick / input / output operations //////////////////
static int client_get_info(struct client_output *output, struct mars_info *info)
{
int status;
#if 0
status = _connect(output, output->brick->brick_name);
if (status < 0)
goto done;
#endif
wait_event_interruptible_timeout(output->info_event, output->got_info, 60 * HZ);
status = -EIO;
if (output->got_info && info) {
memcpy(info, &output->info, sizeof(*info));
status = 0;
}
//done:
return status;
}
static int client_ref_get(struct client_output *output, struct mref_object *mref)
{
int maxlen;
_CHECK_ATOMIC(&mref->ref_count, !=, 0);
/* Limit transfers to page boundaries.
* Currently, this is more restrictive than necessary.
* TODO: improve performance by doing better when possible.
* This needs help from the server in some efficient way.
*/
maxlen = PAGE_SIZE - (mref->ref_pos & (PAGE_SIZE-1));
if (mref->ref_len > maxlen)
mref->ref_len = maxlen;
if (!mref->ref_data) { // buffered IO
struct client_mref_aspect *mref_a = client_mref_get_aspect(output, mref);
if (!mref_a)
return -EILSEQ;
mref->ref_data = kmalloc(mref->ref_len, GFP_MARS);
if (!mref->ref_data)
return -ENOMEM;
mref_a->do_dealloc = true;
mref->ref_flags = 0;
}
atomic_inc(&mref->ref_count);
return 0;
}
static void client_ref_put(struct client_output *output, struct mref_object *mref)
{
struct client_mref_aspect *mref_a;
CHECK_ATOMIC(&mref->ref_count, 1);
if (!atomic_dec_and_test(&mref->ref_count))
return;
mref_a = client_mref_get_aspect(output, mref);
if (mref_a && mref_a->do_dealloc) {
kfree(mref->ref_data);
}
client_free_mref(mref);
}
static void client_ref_io(struct client_output *output, struct mref_object *mref)
{
struct generic_callback *cb;
struct client_mref_aspect *mref_a;
unsigned long flags;
int error = -EINVAL;
mref_a = client_mref_get_aspect(output, mref);
if (unlikely(!mref_a)) {
goto error;
}
atomic_inc(&mref->ref_count);
traced_lock(&output->lock, flags);
mref_a->object->ref_id = ++output->last_id;
list_add_tail(&mref_a->io_head, &output->mref_list);
traced_unlock(&output->lock, flags);
wake_up_interruptible(&output->event);
return;
error:
MARS_ERR("IO error = %d\n", error);
cb = mref->ref_cb;
cb->cb_error = error;
cb->cb_fn(cb);
client_ref_put(output, mref);
}
static int receiver_thread(void *data)
{
struct client_output *output = data;
int status = 0;
while (!kthread_should_stop() && output->socket) {
struct mars_cmd cmd = {};
struct list_head *tmp;
struct client_mref_aspect *mref_a = NULL;
struct mref_object *mref = NULL;
struct generic_callback *cb;
unsigned long flags;
status = mars_recv_struct(&output->socket, &cmd, mars_cmd_meta);
if (status < 0)
goto done;
switch (cmd.cmd_code) {
case CMD_CONNECT:
if (cmd.cmd_int1 < 0) {
status = cmd.cmd_int1;
MARS_ERR("remote connect failed, status = %d\n", status);
goto done;
}
break;
case CMD_CB:
traced_lock(&output->lock, flags);
for (tmp = output->wait_list.next; tmp != &output->wait_list; tmp = tmp->next) {
mref_a = container_of(tmp, struct client_mref_aspect, io_head);
if (mref_a->object->ref_id == cmd.cmd_int1) {
mref = mref_a->object;
break;
}
}
traced_unlock(&output->lock, flags);
if (!mref) {
MARS_ERR("unknown id = %d\n", cmd.cmd_int1);
status = -EBADR;
goto done;
}
status = mars_recv_cb(&output->socket, mref);
if (status < 0) {
MARS_ERR("interrupted data transfer, status = %d\n", status);
goto done;
}
traced_lock(&output->lock, flags);
list_del_init(&mref_a->io_head);
traced_unlock(&output->lock, flags);
cb = mref->ref_cb;
cb->cb_fn(cb);
client_ref_put(output, mref);
break;
case CMD_GETINFO:
status = mars_recv_struct(&output->socket, &output->info, mars_info_meta);
if (status < 0) {
MARS_ERR("got bad info, status = %d\n", status);
goto done;
}
output->got_info = true;
wake_up_interruptible(&output->info_event);
break;
default:
MARS_ERR("got bad command %d, terminating.\n", cmd.cmd_code);
status = -EBADR;
goto done;
}
}
done:
if (status < 0)
MARS_ERR("receiver thread terminated with status = %d\n", status);
output->receiver.thread = NULL;
if (output->socket) {
MARS_INF("shutting down socket\n");
kernel_sock_shutdown(output->socket, SHUT_WR);
//msleep(1000);
output->socket = NULL;
}
return status;
}
static int sender_thread(void *data)
{
struct client_output *output = data;
struct client_brick *brick = output->brick;
int status = 0;
while (!kthread_should_stop()) {
struct list_head *tmp;
struct client_mref_aspect *mref_a;
unsigned long flags;
bool do_resubmit = false;
if (unlikely(!output->socket)) {
status = _connect(output, brick->brick_name);
if (unlikely(status < 0)) {
msleep(5000);
continue;
}
do_resubmit = true;
}
if (unlikely(!output->receiver.thread)) {
output->receiver.thread = kthread_create(receiver_thread, output, "mars_receiver%d", thread_count++);
if (unlikely(IS_ERR(output->receiver.thread))) {
MARS_ERR("cannot start receiver thread, status = %d\n", (int)PTR_ERR(output->receiver.thread));
output->receiver.thread = NULL;
msleep(5000);
continue;
}
wake_up_process(output->receiver.thread);
}
if (do_resubmit) {
/* Re-Submit any waiting requests
*/
traced_lock(&output->lock, flags);
if (!list_empty(&output->wait_list)) {
struct list_head *first = output->wait_list.next;
struct list_head *last = output->wait_list.prev;
struct list_head *old_start = output->mref_list.next;
#define list_connect __list_del // the original routine has a misleading name: in reality it is more general
list_connect(&output->mref_list, first);
list_connect(last, old_start);
INIT_LIST_HEAD(&output->wait_list);
}
traced_unlock(&output->lock, flags);
}
wait_event_interruptible_timeout(output->event, !list_empty(&output->mref_list), 1 * HZ);
if (list_empty(&output->mref_list))
continue;
traced_lock(&output->lock, flags);
tmp = output->mref_list.next;
list_del(tmp);
list_add(tmp, &output->wait_list);
traced_unlock(&output->lock, flags);
mref_a = container_of(tmp, struct client_mref_aspect, io_head);
status = mars_send_mref(&output->socket, mref_a->object);
if (unlikely(status < 0)) {
// retry submission on next occasion..
traced_lock(&output->lock, flags);
list_del(&mref_a->io_head);
list_add(&mref_a->io_head, &output->mref_list);
traced_unlock(&output->lock, flags);
MARS_ERR("sending failed, status = %d\n", status);
_kill_socket(output);
_kill_thread(&output->receiver);
/* Forcibly mark as dead, in any case.
* In consequence, a new connection will be tried thereafter.
*/
output->receiver.thread = NULL;
continue;
}
}
//done:
if (status < 0)
MARS_ERR("sender thread terminated with status = %d\n", status);
_kill_socket(output);
_kill_thread(&output->receiver);
output->sender.thread = NULL;
return status;
}
static int client_switch(struct client_brick *brick)
{
struct client_output *output = brick->outputs[0];
int status = 0;
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
output->sender.thread = kthread_create(sender_thread, output, "mars_sender%d", thread_count++);
if (unlikely(IS_ERR(output->sender.thread))) {
status = PTR_ERR(output->sender.thread);
MARS_ERR("cannot start sender thread, status = %d\n", status);
output->sender.thread = NULL;
goto done;
}
wake_up_process(output->sender.thread);
mars_power_led_on((void*)brick, true);
} else {
mars_power_led_on((void*)brick, false);
_kill_thread(&output->sender);
mars_power_led_off((void*)brick, !output->sender.thread);
}
done:
return status;
}
//////////////// object / aspect constructors / destructors ///////////////
static int client_mref_aspect_init_fn(struct generic_aspect *_ini, void *_init_data)
{
struct client_mref_aspect *ini = (void*)_ini;
INIT_LIST_HEAD(&ini->io_head);
return 0;
}
static void client_mref_aspect_exit_fn(struct generic_aspect *_ini, void *_init_data)
{
struct client_mref_aspect *ini = (void*)_ini;
(void)ini;
}
MARS_MAKE_STATICS(client);
////////////////////// brick constructors / destructors ////////////////////
static int client_brick_construct(struct client_brick *brick)
{
return 0;
}
static int client_output_construct(struct client_output *output)
{
spin_lock_init(&output->lock);
INIT_LIST_HEAD(&output->mref_list);
INIT_LIST_HEAD(&output->wait_list);
init_waitqueue_head(&output->event);
init_waitqueue_head(&output->sender.event);
init_waitqueue_head(&output->receiver.event);
init_waitqueue_head(&output->info_event);
return 0;
}
static int client_output_destruct(struct client_output *output)
{
if (output->host)
kfree(output->host);
return 0;
}
///////////////////////// static structs ////////////////////////
static struct client_brick_ops client_brick_ops = {
.brick_switch = client_switch,
};
static struct client_output_ops client_output_ops = {
.make_object_layout = client_make_object_layout,
.mars_get_info = client_get_info,
.mref_get = client_ref_get,
.mref_put = client_ref_put,
.mref_io = client_ref_io,
};
const struct client_input_type client_input_type = {
.type_name = "client_input",
.input_size = sizeof(struct client_input),
};
static const struct client_input_type *client_input_types[] = {
&client_input_type,
};
const struct client_output_type client_output_type = {
.type_name = "client_output",
.output_size = sizeof(struct client_output),
.master_ops = &client_output_ops,
.output_construct = &client_output_construct,
.output_destruct = &client_output_destruct,
.aspect_types = client_aspect_types,
.layout_code = {
[BRICK_OBJ_MREF] = LAYOUT_ALL,
}
};
static const struct client_output_type *client_output_types[] = {
&client_output_type,
};
const struct client_brick_type client_brick_type = {
.type_name = "client_brick",
.brick_size = sizeof(struct client_brick),
.max_inputs = 0,
.max_outputs = 1,
.master_ops = &client_brick_ops,
.default_input_types = client_input_types,
.default_output_types = client_output_types,
.brick_construct = &client_brick_construct,
};
EXPORT_SYMBOL_GPL(client_brick_type);
////////////////// module init stuff /////////////////////////
static int __init init_client(void)
{
MARS_INF("init_client()\n");
return client_register_brick_type();
}
static void __exit exit_client(void)
{
MARS_INF("exit_client()\n");
client_unregister_brick_type();
}
MODULE_DESCRIPTION("MARS client brick");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(init_client);
module_exit(exit_client);

45
mars_client.h Normal file
View File

@ -0,0 +1,45 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
#ifndef MARS_CLIENT_H
#define MARS_CLIENT_H
#include "mars_net.h"
struct client_mref_aspect {
GENERIC_ASPECT(mref);
struct list_head io_head;
bool do_dealloc;
};
struct client_brick {
MARS_BRICK(client);
};
struct client_input {
MARS_INPUT(client);
};
struct client_threadinfo {
struct task_struct *thread;
wait_queue_head_t event;
};
struct client_output {
MARS_OUTPUT(client);
spinlock_t lock;
struct list_head mref_list;
struct list_head wait_list;
wait_queue_head_t event;
int last_id;
struct socket *socket;
char *host;
char *path;
struct client_threadinfo sender;
struct client_threadinfo receiver;
struct mars_info info;
wait_queue_head_t info_event;
bool got_info;
};
MARS_TYPES(client);
#endif

572
mars_copy.c Normal file
View File

@ -0,0 +1,572 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
// Copy brick (just for demonstration)
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
//#define IO_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kthread.h>
#include "mars.h"
///////////////////////// own type definitions ////////////////////////
#include "mars_copy.h"
///////////////////////// own helper functions ////////////////////////
/* TODO:
* The clash logic is untested / alpha stage (Feb. 2011).
*
* For now, the output is never used, so this cannot do harm.
*
* In order to get the output really working / enterprise grade,
* some larger test effort should be invested.
*/
static inline
void _clash(struct copy_brick *brick)
{
brick->trigger = true;
set_bit(0, &brick->clash);
wake_up_interruptible(&brick->event);
}
static inline
int _clear_clash(struct copy_brick *brick)
{
int old;
old = test_and_clear_bit(0, &brick->clash);
return old;
}
/* Current semantics:
*
* All writes are always going to the original input A. They are _not_
* replicated to B.
*
* In order to get B really uptodate, you have to replay the right
* transaction logs there (at the right time).
* [If you had no writes on A at all during the copy, of course
* this is not necessary]
*
* When optimize_mode is on, reads can utilize the already copied
* region from B, but only as long as this region has not been
* invalidated by writes (indicated by low_dirty).
*
* TODO: implement replicated writes, together with some transaction
* replay logic applying the transaction logs _only_ after
* crashes during inconsistency caused by partial replication of writes.
*/
static
int _determine_input(struct copy_brick *brick, struct mref_object *mref)
{
int rw;
int below;
int behind;
loff_t ref_end;
if (!brick->optimize_mode || brick->low_dirty)
return INPUT_A_IO;
ref_end = mref->ref_pos + mref->ref_len;
below = ref_end <= brick->copy_start;
behind = !brick->copy_end || mref->ref_pos >= brick->copy_end;
rw = mref->ref_may_write | mref->ref_rw;
if (rw) {
if (!behind) {
brick->low_dirty = true;
if (!below) {
_clash(brick);
wake_up_interruptible(&brick->event);
}
}
return INPUT_A_IO;
}
if (below)
return INPUT_B_IO;
return INPUT_A_IO;
}
#define MAKE_INDEX(pos) (((pos) / PAGE_SIZE) % MAX_COPY_PARA)
static
void copy_endio(struct generic_callback *cb)
{
struct copy_mref_aspect *mref_a;
struct mref_object *mref;
struct copy_brick *brick;
int index;
int queue;
mref_a = cb->cb_private;
CHECK_PTR(mref_a, err);
mref = mref_a->object;
CHECK_PTR(mref, err);
brick = mref_a->brick;
CHECK_PTR(brick, err);
queue = mref_a->queue;
index = MAKE_INDEX(mref->ref_pos);
MARS_IO("queue = %d index = %d pos = %lld status = %d\n", queue, index, mref->ref_pos, cb->cb_error);
if (unlikely(queue < 0 || queue >= 2)) {
MARS_ERR("bad queue %d\n", queue);
_clash(brick);
goto exit;
}
if (unlikely(brick->table[index][queue])) {
MARS_ERR("table corruption at %d %d (%p => %p)\n", index, queue, brick->table[index], mref);
_clash(brick);
brick->state[index] = -EINVAL;
goto exit;
}
if (unlikely(cb->cb_error < 0)) {
MARS_ERR("IO error %d on index %d, old state =%d\n", cb->cb_error, index, brick->state[index]);
brick->state[index] = cb->cb_error;
} else if (likely(brick->state[index] > 0)) {
brick->table[index][queue] = mref;
}
exit:
atomic_dec(&brick->copy_flight);
brick->trigger = true;
wake_up_interruptible(&brick->event);
return;
err:
MARS_FAT("cannot handle callback\n");
}
static
int _make_mref(struct copy_brick *brick, int index, int queue, void *data, loff_t pos, int rw)
{
struct mref_object *mref;
struct copy_mref_aspect *mref_a;
struct copy_input *input;
loff_t tmp_pos;
int len;
int status = -1;
tmp_pos = brick->copy_end;
if (brick->clash || !tmp_pos)
goto done;
mref = copy_alloc_mref(brick->outputs[0], &brick->mref_object_layout);
status = -ENOMEM;
if (unlikely(!mref))
goto done;
mref_a = copy_mref_get_aspect(brick->outputs[0], mref);
if (unlikely(!mref_a)) {
kfree(mref);
goto done;
}
mref_a->brick = brick;
mref_a->queue = queue;
mref->ref_may_write = rw;
mref->ref_rw = rw;
mref->ref_data = data;
mref->ref_pos = pos;
len = PAGE_SIZE - (pos & (PAGE_SIZE-1));
if (pos + len > tmp_pos) {
len = tmp_pos - pos;
}
mref->ref_len = len;
mref->_ref_cb.cb_private = mref_a;
mref->_ref_cb.cb_fn = copy_endio;
mref->ref_cb = &mref->_ref_cb;
input = queue ? brick->inputs[INPUT_B_COPY] : brick->inputs[INPUT_A_COPY];
status = GENERIC_INPUT_CALL(input, mref_get, mref);
if (unlikely(status < 0)) {
MARS_ERR("status = %d\n", status);
mars_free_mref(mref);
goto done;
}
MARS_IO("queue = %d index = %d pos = %lld len = %d rw = %d\n", queue, index, mref->ref_pos, mref->ref_len, rw);
atomic_inc(&brick->copy_flight);
GENERIC_INPUT_CALL(input, mref_io, mref);
done:
return status;
}
static
void _clear_mref(struct copy_brick *brick, int index, int queue)
{
struct mref_object *mref = brick->table[index][queue];
if (mref) {
struct copy_input *input;
input = queue ? brick->inputs[INPUT_B_COPY] : brick->inputs[INPUT_A_COPY];
GENERIC_INPUT_CALL(input, mref_put, mref);
brick->table[index][queue] = NULL;
}
}
static
int _next_state(struct copy_brick *brick, loff_t pos)
{
struct mref_object *mref1;
struct mref_object *mref2;
int index = MAKE_INDEX(pos);
char state;
char next_state;
int i;
int status;
state = brick->state[index];
next_state = -1;
mref2 = NULL;
status = 0;
MARS_IO("index = %d state = %d pos = %lld\n", index, state, pos);
switch (state) {
case COPY_STATE_START:
if (brick->table[index][0] || brick->table[index][1]) {
MARS_ERR("index %d not startable\n", index);
status = -EPROTO;
goto done;
}
i = 0;
next_state = COPY_STATE_READ1;
if (brick->verify_mode) {
i = 1;
next_state = COPY_STATE_READ2;
}
for ( ; i >= 0; i--) {
status = _make_mref(brick, index, i, NULL, pos, 0);
if (status < 0) {
break;
}
}
break;
case COPY_STATE_READ2:
mref2 = brick->table[index][1];
if (!mref2) {
goto done;
}
/* fallthrough */
case COPY_STATE_READ1:
mref1 = brick->table[index][0];
if (!mref1) {
goto done;
}
if (mref2) {
int len = mref1->ref_len;
if (len == mref2->ref_len &&
!memcmp(mref1->ref_data, mref2->ref_data, len)) {
/* skip start of writing, goto final treatment of writeout */
next_state = COPY_STATE_WRITE;
brick->state[index] = next_state;
goto COPY_STATE_WRITE;
}
_clear_mref(brick, index, 1);
}
/* start writeout */
next_state = COPY_STATE_WRITE;
status = _make_mref(brick, index, 1, mref1->ref_data, pos, 1);
break;
case COPY_STATE_WRITE:
COPY_STATE_WRITE:
mref2 = brick->table[index][1];
if (!mref2 || brick->copy_start != pos) {
MARS_IO("irrelevant\n");
goto done;
}
if (!brick->clash) {
brick->copy_start += mref2->ref_len;
MARS_IO("new copy_start = %lld\n", brick->copy_start);
if (brick->copy_start > brick->copy_last + 1024 * 1024 * 1024 || brick->copy_start == brick->copy_end) {
brick->copy_last = brick->copy_start;
MARS_INF("'%s' copied %lld / %lld bytes (%lld%%)\n", brick->brick_name, brick->copy_last, brick->copy_end, brick->copy_end? brick->copy_last * 100 / brick->copy_end : 100);
}
}
next_state = COPY_STATE_CLEANUP;
/* fallthrough */
case COPY_STATE_CLEANUP:
_clear_mref(brick, index, 0);
_clear_mref(brick, index, 1);
next_state = COPY_STATE_START;
break;
default:
MARS_ERR("illegal state %d at index %d\n", state, index);
_clash(brick);
status = -EILSEQ;
}
brick->state[index] = next_state;
if (status < 0) {
brick->state[index] = -1;
MARS_ERR("status = %d\n", status);
_clash(brick);
}
done:
return status;
}
static
void _run_copy(struct copy_brick *brick)
{
int max;
loff_t pos;
int i;
int status;
if (_clear_clash(brick)) {
MARS_DBG("clash\n");
if (atomic_read(&brick->copy_flight)) {
/* wait until all pending copy IO has finished
*/
_clash(brick);
MARS_DBG("re-clash\n");
msleep(50);
return;
}
for (i = 0; i < MAX_COPY_PARA; i++) {
brick->table[i][0] = NULL;
brick->table[i][1] = NULL;
brick->state[i] = COPY_STATE_START;
}
}
max = MAX_COPY_PARA - atomic_read(&brick->io_flight) * 2;
MARS_IO("max = %d\n", max);
for (pos = brick->copy_start; pos < brick->copy_end; pos = ((pos / PAGE_SIZE) + 1) * PAGE_SIZE) {
//MARS_IO("pos = %lld\n", pos);
if (brick->clash || max-- <= 0)
break;
status = _next_state(brick, pos);
}
}
static int _copy_thread(void *data)
{
struct copy_brick *brick = data;
MARS_DBG("--------------- copy_thread %p starting\n", brick);
mars_power_led_on((void*)brick, true);
brick->trigger = true;
while (!kthread_should_stop()) {
loff_t old_start = brick->copy_start;
loff_t old_end = brick->copy_end;
if (old_end > 0)
_run_copy(brick);
wait_event_interruptible_timeout(brick->event,
brick->trigger || brick->copy_start != old_start || brick->copy_end != old_end || kthread_should_stop(),
20 * HZ);
brick->trigger = false;
}
MARS_DBG("--------------- copy_thread terminating\n");
wait_event_interruptible_timeout(brick->event, !atomic_read(&brick->copy_flight), 300 * HZ);
mars_power_led_off((void*)brick, true);
MARS_DBG("--------------- copy_thread done.\n");
return 0;
}
////////////////// own brick / input / output operations //////////////////
static int copy_get_info(struct copy_output *output, struct mars_info *info)
{
struct copy_input *input = output->brick->inputs[INPUT_B_IO];
return GENERIC_INPUT_CALL(input, mars_get_info, info);
}
static int copy_ref_get(struct copy_output *output, struct mref_object *mref)
{
struct copy_input *input;
int index;
int status;
index = _determine_input(output->brick, mref);
input = output->brick->inputs[index];
status = GENERIC_INPUT_CALL(input, mref_get, mref);
if (status >= 0) {
atomic_inc(&output->brick->io_flight);
}
return status;
}
static void copy_ref_put(struct copy_output *output, struct mref_object *mref)
{
struct copy_input *input;
int index;
index = _determine_input(output->brick, mref);
input = output->brick->inputs[index];
GENERIC_INPUT_CALL(input, mref_put, mref);
if (atomic_dec_and_test(&output->brick->io_flight)) {
output->brick->trigger = true;
wake_up_interruptible(&output->brick->event);
}
}
static void copy_ref_io(struct copy_output *output, struct mref_object *mref)
{
struct copy_input *input;
int index;
index = _determine_input(output->brick, mref);
input = output->brick->inputs[index];
GENERIC_INPUT_CALL(input, mref_io, mref);
}
static int copy_switch(struct copy_brick *brick)
{
static int version = 0;
MARS_DBG("power.button = %d\n", brick->power.button);
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
if (!brick->thread) {
brick->thread = kthread_create(_copy_thread, brick, "mars_copy%d", version++);
if (brick->thread) {
get_task_struct(brick->thread);
brick->trigger = true;
wake_up_process(brick->thread);
} else {
mars_power_led_off((void*)brick, true);
MARS_ERR("could not start copy thread\n");
}
}
} else {
mars_power_led_on((void*)brick, false);
if (brick->thread) {
kthread_stop_nowait(brick->thread);
put_task_struct(brick->thread);
brick->thread = NULL;
wake_up_interruptible(&brick->event);
}
}
return 0;
}
//////////////// object / aspect constructors / destructors ///////////////
static int copy_mref_aspect_init_fn(struct generic_aspect *_ini, void *_init_data)
{
struct copy_mref_aspect *ini = (void*)_ini;
(void)ini;
return 0;
}
static void copy_mref_aspect_exit_fn(struct generic_aspect *_ini, void *_init_data)
{
struct copy_mref_aspect *ini = (void*)_ini;
(void)ini;
}
MARS_MAKE_STATICS(copy);
////////////////////// brick constructors / destructors ////////////////////
static int copy_brick_construct(struct copy_brick *brick)
{
init_waitqueue_head(&brick->event);
sema_init(&brick->mutex, 1);
return 0;
}
static int copy_brick_destruct(struct copy_brick *brick)
{
return 0;
}
static int copy_output_construct(struct copy_output *output)
{
return 0;
}
static int copy_output_destruct(struct copy_output *output)
{
return 0;
}
///////////////////////// static structs ////////////////////////
static struct copy_brick_ops copy_brick_ops = {
.brick_switch = copy_switch,
};
static struct copy_output_ops copy_output_ops = {
.make_object_layout = copy_make_object_layout,
.mars_get_info = copy_get_info,
.mref_get = copy_ref_get,
.mref_put = copy_ref_put,
.mref_io = copy_ref_io,
};
const struct copy_input_type copy_input_type = {
.type_name = "copy_input",
.input_size = sizeof(struct copy_input),
};
static const struct copy_input_type *copy_input_types[] = {
&copy_input_type,
&copy_input_type,
&copy_input_type,
&copy_input_type,
};
const struct copy_output_type copy_output_type = {
.type_name = "copy_output",
.output_size = sizeof(struct copy_output),
.master_ops = &copy_output_ops,
.output_construct = &copy_output_construct,
.output_destruct = &copy_output_destruct,
.aspect_types = copy_aspect_types,
.layout_code = {
[BRICK_OBJ_MREF] = LAYOUT_ALL,
}
};
static const struct copy_output_type *copy_output_types[] = {
&copy_output_type,
};
const struct copy_brick_type copy_brick_type = {
.type_name = "copy_brick",
.brick_size = sizeof(struct copy_brick),
.max_inputs = 4,
.max_outputs = 1,
.master_ops = &copy_brick_ops,
.default_input_types = copy_input_types,
.default_output_types = copy_output_types,
.brick_construct = &copy_brick_construct,
.brick_destruct = &copy_brick_destruct,
};
EXPORT_SYMBOL_GPL(copy_brick_type);
////////////////// module init stuff /////////////////////////
static int __init init_copy(void)
{
MARS_INF("init_copy()\n");
return copy_register_brick_type();
}
static void __exit exit_copy(void)
{
MARS_INF("exit_copy()\n");
copy_unregister_brick_type();
}
MODULE_DESCRIPTION("MARS copy brick");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(init_copy);
module_exit(exit_copy);

62
mars_copy.h Normal file
View File

@ -0,0 +1,62 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
#ifndef MARS_COPY_H
#define MARS_COPY_H
#include <linux/wait.h>
#include <linux/semaphore.h>
#define INPUT_A_IO 0
#define INPUT_A_COPY 1
#define INPUT_B_IO 2
#define INPUT_B_COPY 3
#define MAX_COPY_PARA 512
enum {
COPY_STATE_START = 0,
COPY_STATE_READ1 = 1,
COPY_STATE_READ2 = 2,
COPY_STATE_WRITE,
COPY_STATE_CLEANUP,
};
struct copy_mref_aspect {
GENERIC_ASPECT(mref);
struct copy_brick *brick;
int queue;
};
struct copy_brick {
MARS_BRICK(copy);
// parameters
volatile loff_t copy_start;
volatile loff_t copy_end; // stop working if == 0
loff_t copy_last;
bool verify_mode;
bool optimize_mode;
bool low_dirty;
// internal
volatile bool trigger;
volatile unsigned long clash;
atomic_t io_flight;
atomic_t copy_flight;
wait_queue_head_t event;
struct semaphore mutex;
struct task_struct *thread;
char state[MAX_COPY_PARA];
struct mref_object *table[MAX_COPY_PARA][2];
struct generic_object_layout mref_object_layout;
};
struct copy_input {
MARS_INPUT(copy);
};
struct copy_output {
MARS_OUTPUT(copy);
};
MARS_TYPES(copy);
#endif

View File

@ -2,7 +2,7 @@
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
//#define LOG
//#define IO_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
@ -44,7 +44,7 @@ static int device_aio_ref_get(struct device_aio_output *output, struct mref_obje
return -ENOMEM;
mref->ref_flags = 0;
mref_a->do_dealloc = true;
#if 1 // litter flags for testing
#if 0 // litter flags for testing
if (mref->ref_rw) {
static int random = 0;
if (!(random++ % 2))
@ -84,16 +84,14 @@ static void device_aio_ref_io(struct device_aio_output *output, struct mref_obje
goto done;
}
#ifdef LOG
MARS_INF("AIO rw=%d pos=%lld len=%d data=%p\n", mref->ref_rw, mref->ref_pos, mref->ref_len, mref->ref_data);
#endif
MARS_IO("AIO rw=%d pos=%lld len=%d data=%p\n", mref->ref_rw, mref->ref_pos, mref->ref_len, mref->ref_data);
mref_a = device_aio_mref_get_aspect(output, mref);
traced_lock(&tinfo->lock, flags);
list_add_tail(&mref_a->io_head, &tinfo->mref_list);
traced_unlock(&tinfo->lock, flags);
wake_up(&tinfo->event);
wake_up_interruptible(&tinfo->event);
return;
done:
@ -129,6 +127,22 @@ static int device_aio_submit(struct device_aio_output *output, struct device_aio
return res;
}
static int device_aio_submit_dummy(struct device_aio_output *output)
{
mm_segment_t oldfs;
int res;
struct iocb iocb = {
};
struct iocb *iocbp = &iocb;
oldfs = get_fs();
set_fs(get_ds());
res = sys_io_submit(output->ctxp, 1, &iocbp);
set_fs(oldfs);
return res;
}
static int device_aio_submit_thread(void *data)
{
struct aio_threadinfo *tinfo = data;
@ -137,7 +151,7 @@ static int device_aio_submit_thread(void *data)
/* TODO: this is provisionary. We only need it for sys_io_submit().
* The latter should be accompanied by a future vfs_submit() or
* do_sumbmit() which currently does not exist :(
* do_submit() which currently does not exist :(
* FIXME: corresponding cleanup NYI
*/
err = get_unused_fd();
@ -150,11 +164,15 @@ static int device_aio_submit_thread(void *data)
MARS_INF("kthread has started.\n");
//set_user_nice(current, -20);
#if 0
fake_mm();
#else
MARS_INF("old mm = %p\n", current->mm);
use_mm(tinfo->mm);
MARS_INF("new mm = %p\n", current->mm);
if (!current->mm)
return 0;
#endif
while (!kthread_should_stop()) {
struct list_head *tmp = NULL;
@ -205,6 +223,7 @@ static int device_aio_submit_thread(void *data)
unuse_mm(tinfo->mm);
MARS_INF("kthread has stopped.\n");
tinfo->terminated = true;
return 0;
}
@ -229,12 +248,15 @@ static int device_aio_event_thread(void *data)
int bounced;
int i;
struct timespec timeout = {
.tv_sec = 30,
.tv_sec = 10,
};
struct io_event events[MARS_MAX_AIO_READ];
oldfs = get_fs();
set_fs(get_ds());
/* TODO: don't timeout upon termination.
* Probably we should submit a dummy request.
*/
count = sys_io_getevents(output->ctxp, 1, MARS_MAX_AIO_READ, events, &timeout);
set_fs(oldfs);
@ -242,13 +264,17 @@ static int device_aio_event_thread(void *data)
bounced = 0;
for (i = 0; i < count; i++) {
struct device_aio_mref_aspect *mref_a = (void*)events[i].data;
struct mref_object *mref = mref_a->object;
struct generic_callback *cb = mref->ref_cb;
struct mref_object *mref;
struct generic_callback *cb;
int err = events[i].res;
#ifdef LOG
MARS_INF("AIO done %p pos = %lld len = %d rw = %d\n", mref, mref->ref_pos, mref->ref_len, mref->ref_rw);
#endif
if (!mref_a) {
continue; // this was a dummy request
}
mref = mref_a->object;
cb = mref->ref_cb;
MARS_IO("AIO done %p pos = %lld len = %d rw = %d\n", mref, mref->ref_pos, mref->ref_len, mref->ref_rw);
if (output->o_fdsync
&& err >= 0
@ -277,12 +303,13 @@ static int device_aio_event_thread(void *data)
device_aio_ref_put(output, mref);
}
if (bounced)
wake_up(&other->event);
wake_up_interruptible(&other->event);
}
unuse_mm(tinfo->mm);
MARS_INF("kthread has stopped.\n");
tinfo->terminated = true;
return 0;
}
@ -294,7 +321,7 @@ static int device_aio_sync_thread(void *data)
struct device_aio_output *output = tinfo->output;
struct file *file = output->filp;
MARS_INF("kthread has started.\n");
MARS_INF("kthread has started on '%s'.\n", output->brick->brick_name);
//set_user_nice(current, -20);
while (!kthread_should_stop()) {
@ -339,13 +366,18 @@ static int device_aio_sync_thread(void *data)
}
MARS_INF("kthread has stopped.\n");
tinfo->terminated = true;
return 0;
}
static int device_aio_get_info(struct device_aio_output *output, struct mars_info *info)
{
struct file *file = output->filp;
if (unlikely(!file || !file->f_mapping || !file->f_mapping->host))
return -EINVAL;
info->current_size = i_size_read(file->f_mapping->host);
MARS_DBG("determined file size = %lld\n", info->current_size);
info->backing_file = file;
return 0;
}
@ -374,23 +406,27 @@ static int device_aio_brick_construct(struct device_aio_brick *brick)
return 0;
}
static int device_aio_switch(struct device_aio_brick *brick, bool state)
static int device_aio_switch(struct device_aio_brick *brick)
{
static int index = 0;
struct device_aio_output *output = brick->outputs[0];
char *path = output->output_name;
const char *path = output->output_name;
int flags = O_CREAT | O_RDWR | O_LARGEFILE;
int prot = 0600;
mm_segment_t oldfs;
int i;
int err = 0;
MARS_DBG("power.button = %d\n", brick->power.button);
if (!brick->power.button)
goto cleanup;
mars_power_led_off((void*)brick, false);
if (output->o_direct) {
flags |= O_DIRECT;
MARS_INF("using O_DIRECT on %s\n", path);
}
if (!state)
goto cleanup;
oldfs = get_fs();
set_fs(get_ds());
@ -403,9 +439,14 @@ static int device_aio_switch(struct device_aio_brick *brick, bool state)
output->filp = NULL;
return err;
}
MARS_DBG("opened file '%s'\n", path);
if (!output->ctxp) {
MARS_INF("mm = %p\n", current->mm);
if (!current->mm) {
MARS_ERR("mm = %p\n", current->mm);
err = -EINVAL;
goto err;
}
oldfs = get_fs();
set_fs(get_ds());
err = sys_io_setup(MARS_MAX_AIO, &output->ctxp);
@ -426,6 +467,7 @@ static int device_aio_switch(struct device_aio_brick *brick, bool state)
tinfo->mm = current->mm;
spin_lock_init(&tinfo->lock);
init_waitqueue_head(&tinfo->event);
tinfo->terminated = false;
tinfo->thread = kthread_create(fn[i], tinfo, "mars_aio%d", index++);
if (IS_ERR(tinfo->thread)) {
err = PTR_ERR(tinfo->thread);
@ -437,26 +479,50 @@ static int device_aio_switch(struct device_aio_brick *brick, bool state)
}
MARS_INF("opened file '%s'\n", path);
mars_power_led_on((void*)brick, true);
MARS_DBG("successfully switched on.\n");
return 0;
err:
MARS_ERR("status = %d\n", err);
cleanup:
for (i = 0; i < 2; i++) {
mars_power_led_on((void*)brick, false);
for (i = 0; i < 3; i++) {
struct aio_threadinfo *tinfo = &output->tinfo[i];
if (tinfo->thread) {
kthread_stop(tinfo->thread);
// FIXME: wait for termination
tinfo->thread = NULL;
}
}
if (output->ctxp) {
//...
device_aio_submit_dummy(output);
for (i = 0; i < 3; i++) {
struct aio_threadinfo *tinfo = &output->tinfo[i];
if (tinfo->thread) {
// wait for termination
wait_event_interruptible_timeout(
tinfo->event,
tinfo->terminated, 30 * HZ);
if (tinfo->terminated)
tinfo->thread = NULL;
}
}
if (output->filp) {
filp_close(output->filp, NULL);
output->filp = NULL;
mars_power_led_off((void*)brick,
(output->tinfo[0].thread == NULL &&
output->tinfo[1].thread == NULL &&
output->tinfo[2].thread == NULL));
if (brick->power.led_off) {
if (output->filp) {
filp_close(output->filp, NULL);
output->filp = NULL;
}
if (output->ctxp) {
#if 0 // FIXME this crashes
sys_io_destroy(output->ctxp);
#endif
output->ctxp = 0;
}
}
MARS_DBG("switch off status = %d\n", err);
return err;
}
@ -467,7 +533,8 @@ static int device_aio_output_construct(struct device_aio_output *output)
static int device_aio_output_destruct(struct device_aio_output *output)
{
return device_aio_switch(output->brick, false);
mars_power_button((void*)output->brick, false);
return device_aio_switch(output->brick);
}
///////////////////////// static structs ////////////////////////

View File

@ -27,6 +27,7 @@ struct aio_threadinfo {
struct mm_struct *mm;
wait_queue_head_t event;
spinlock_t lock;
bool terminated;
};
struct device_aio_output {

View File

@ -282,8 +282,6 @@ static void device_sio_mars_queue(struct device_sio_output *output, struct mref_
struct generic_callback *cb = mref->ref_cb;
unsigned long flags;
atomic_inc(&mref->ref_count);
if (mref->ref_rw == READ) {
traced_lock(&output->g_lock, flags);
index = output->index++;
@ -297,6 +295,9 @@ static void device_sio_mars_queue(struct device_sio_output *output, struct mref_
cb->cb_fn(cb);
return;
}
atomic_inc(&mref->ref_count);
tinfo = &output->tinfo[index];
MARS_DBG("queueing %p on %d\n", mref, index);
@ -304,7 +305,7 @@ static void device_sio_mars_queue(struct device_sio_output *output, struct mref_
list_add_tail(&mref_a->io_head, &tinfo->mref_list);
traced_unlock(&tinfo->lock, flags);
wake_up(&tinfo->event);
wake_up_interruptible(&tinfo->event);
}
static int device_sio_thread(void *data)
@ -407,10 +408,10 @@ static int device_sio_brick_construct(struct device_sio_brick *brick)
return 0;
}
static int device_sio_switch(struct device_sio_brick *brick, bool state)
static int device_sio_switch(struct device_sio_brick *brick)
{
struct device_sio_output *output = brick->outputs[0];
char *path = output->output_name;
const char *path = output->output_name;
int flags = O_CREAT | O_RDWR | O_LARGEFILE;
int prot = 0600;
mm_segment_t oldfs;
@ -419,7 +420,8 @@ static int device_sio_switch(struct device_sio_brick *brick, bool state)
flags |= O_DIRECT;
MARS_INF("using O_DIRECT on %s\n", path);
}
if (state) {
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
oldfs = get_fs();
set_fs(get_ds());
output->filp = filp_open(path, flags, prot);
@ -439,8 +441,11 @@ static int device_sio_switch(struct device_sio_brick *brick, bool state)
}
#endif
MARS_INF("opened file '%s'\n", path);
mars_power_led_on((void*)brick, true);
} else {
mars_power_led_on((void*)brick, false);
// TODO: close etc...
mars_power_led_off((void*)brick, true);
}
return 0;
}

View File

@ -43,13 +43,28 @@ static void dummy_ref_io(struct dummy_output *output, struct mref_object *mref)
GENERIC_INPUT_CALL(input, mref_io, mref);
}
static int dummy_switch(struct dummy_brick *brick)
{
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
//...
mars_power_led_on((void*)brick, true);
} else {
mars_power_led_on((void*)brick, false);
//...
mars_power_led_off((void*)brick, true);
}
return 0;
}
//////////////// object / aspect constructors / destructors ///////////////
static int dummy_mref_aspect_init_fn(struct generic_aspect *_ini, void *_init_data)
{
struct dummy_mref_aspect *ini = (void*)_ini;
(void)ini;
ini->my_own = 0;
//ini->my_own = 0;
return 0;
}
@ -65,19 +80,30 @@ MARS_MAKE_STATICS(dummy);
static int dummy_brick_construct(struct dummy_brick *brick)
{
brick->my_own = 0;
//brick->my_own = 0;
return 0;
}
static int dummy_brick_destruct(struct dummy_brick *brick)
{
return 0;
}
static int dummy_output_construct(struct dummy_output *output)
{
output->my_own = 0;
//output->my_own = 0;
return 0;
}
static int dummy_output_destruct(struct dummy_output *output)
{
return 0;
}
///////////////////////// static structs ////////////////////////
static struct dummy_brick_ops dummy_brick_ops = {
.brick_switch = dummy_switch,
};
static struct dummy_output_ops dummy_output_ops = {
@ -102,6 +128,7 @@ const struct dummy_output_type dummy_output_type = {
.output_size = sizeof(struct dummy_output),
.master_ops = &dummy_output_ops,
.output_construct = &dummy_output_construct,
.output_destruct = &dummy_output_destruct,
.aspect_types = dummy_aspect_types,
.layout_code = {
[BRICK_OBJ_MREF] = LAYOUT_ALL,
@ -121,6 +148,7 @@ const struct dummy_brick_type dummy_brick_type = {
.default_input_types = dummy_input_types,
.default_output_types = dummy_output_types,
.brick_construct = &dummy_brick_construct,
.brick_destruct = &dummy_brick_destruct,
};
EXPORT_SYMBOL_GPL(dummy_brick_type);
@ -128,13 +156,13 @@ EXPORT_SYMBOL_GPL(dummy_brick_type);
static int __init init_dummy(void)
{
printk(MARS_INFO "init_dummy()\n");
MARS_INF("init_dummy()\n");
return dummy_register_brick_type();
}
static void __exit exit_dummy(void)
{
printk(MARS_INFO "exit_dummy()\n");
MARS_INF("exit_dummy()\n");
dummy_unregister_brick_type();
}

View File

@ -1,12 +1,77 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/utsname.h>
#define _STRATEGY
#include "mars.h"
#include <linux/syscalls.h>
#include <linux/namei.h>
#include <linux/kthread.h>
// some helpers
int mars_mkdir(const char *path)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_mkdir(path, 0700);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_mkdir);
int mars_symlink(const char *oldpath, const char *newpath, const struct timespec *stamp)
{
int newlen = strlen(newpath);
char tmp[newlen + 16];
mm_segment_t oldfs;
int status;
snprintf(tmp, sizeof(tmp), "%s.tmp", newpath);
oldfs = get_fs();
set_fs(get_ds());
(void)sys_unlink(tmp);
status = sys_symlink(oldpath, tmp);
set_fs(oldfs);
// TODO NYI: set timestamp
if (status >= 0) {
status = mars_rename(tmp, newpath);
}
return status;
}
EXPORT_SYMBOL_GPL(mars_symlink);
int mars_rename(const char *oldpath, const char *newpath)
{
mm_segment_t oldfs;
int status;
oldfs = get_fs();
set_fs(get_ds());
status = sys_rename(oldpath, newpath);
set_fs(oldfs);
return status;
}
EXPORT_SYMBOL_GPL(mars_rename);
//////////////////////////////////////////////////////////////
// object stuff
@ -22,18 +87,615 @@ EXPORT_SYMBOL_GPL(mref_type);
// brick stuff
//////////////////////////////////////////////////////////////
// infrastructure
static char *id = NULL;
/* TODO: better use MAC addresses (or motherboard IDs where available).
* Or, at least, some checks for MAC addresses should be recorded / added.
* When the nodename is misconfigured, data might be scrambled.
* MAC addresses should be more secure.
* In ideal case, further checks should be added to prohibit accidental
* name clashes.
*/
char *my_id(void)
{
struct new_utsname *u;
if (id)
return id;
//down_read(&uts_sem); // FIXME: this is currenty not EXPORTed from the kernel!
u = utsname();
if (u) {
id = kstrdup(u->nodename, GFP_MARS);
}
//up_read(&uts_sem);
return id;
}
EXPORT_SYMBOL_GPL(my_id);
struct mars_global *mars_global = NULL;
EXPORT_SYMBOL_GPL(mars_global);
void mars_trigger(void)
{
if (mars_global) {
MARS_DBG("trigger...\n");
mars_global->main_trigger = true;
wake_up_interruptible(&mars_global->main_event);
}
}
EXPORT_SYMBOL_GPL(mars_trigger);
void mars_power_button(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.button;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' power button %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_button(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_button);
void mars_power_led_on(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_on;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' led_on %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_on(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_on);
void mars_power_led_off(struct mars_brick *brick, bool val)
{
bool oldval = brick->power.led_off;
if (val != oldval) {
MARS_DBG("brick '%s' type '%s' led_off %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val);
set_led_off(&brick->power, val);
mars_trigger();
}
}
EXPORT_SYMBOL_GPL(mars_power_led_off);
/////////////////////////////////////////////////////////////////////
// strategy layer
struct mars_cookie {
struct mars_global *global;
mars_dent_checker checker;
char *path;
void *parent;
int pathlen;
int allocsize;
int depth;
};
static
int get_inode(char *newpath, struct mars_dent *dent)
{
mm_segment_t oldfs;
int status;
struct path path;
oldfs = get_fs();
set_fs(get_ds());
status = user_path_at(AT_FDCWD, newpath, 0, &path);
if (!status) {
struct inode *inode = path.dentry->d_inode;
memcpy(&dent->old_stat, &dent->new_stat, sizeof(dent->old_stat));
generic_fillattr(inode, &dent->new_stat);
if (S_ISLNK(dent->new_stat.mode)) {
int len = dent->new_stat.size;
char *link;
status = -ENOMEM;
link = kmalloc(len + 1, GFP_MARS);
if (link) {
status = inode->i_op->readlink(path.dentry, link, len);
link[len] = '\0';
if (status < 0 ||
(dent->new_link && !strncmp(dent->new_link, link, len))) {
MARS_DBG("symlink free '%s' (%s) status = %d\n", link, dent->new_link ? dent->new_link : "", status);
kfree(link);
} else {
MARS_DBG("symlink new '%s' (%s) status = %d\n", link, dent->new_link ? dent->new_link : "", status);
if (dent->old_link)
kfree(dent->old_link);
dent->old_link = dent->new_link;
dent->new_link = link;
}
}
}
path_put(&path);
}
set_fs(oldfs);
if (dent->new_link)
MARS_IO("symlink '%s'\n", dent->new_link);
return status;
}
static
int mars_filler(void *__buf, const char *name, int namlen, loff_t offset,
u64 ino, unsigned int d_type)
{
struct mars_cookie *cookie = __buf;
struct mars_global *global = cookie->global;
struct list_head *anchor = &global->dent_anchor;
struct mars_dent *dent;
struct list_head *tmp;
struct mars_dent *best = NULL;
char *newpath;
int prefix = 0;
int pathlen;
int class;
int serial = 0;
MARS_IO("ino = %llu len = %d offset = %lld type = %u\n", ino, namlen, offset, d_type);
if (name[0] == '.') {
return 0;
}
class = cookie->checker(cookie->path, name, namlen, d_type, &prefix, &serial);
if (class < 0)
return 0;
pathlen = cookie->pathlen;
newpath = kmalloc(pathlen + namlen + 2, GFP_MARS);
if (unlikely(!newpath))
goto err_mem0;
memcpy(newpath, cookie->path, pathlen);
newpath[pathlen++] = '/';
memcpy(newpath + pathlen, name, namlen);
pathlen += namlen;
newpath[pathlen] = '\0';
MARS_IO("path = '%s'\n", newpath);
for (tmp = anchor->next; tmp != anchor; tmp = tmp->next) {
int cmp;
dent = container_of(tmp, struct mars_dent, sub_link);
cmp = strcmp(dent->d_path, newpath);
if (!cmp) {
kfree(newpath);
return 0;
}
// keep the list sorted. find the next smallest member.
if ((dent->d_class < class ||
(dent->d_class == class &&
(dent->d_serial < serial ||
(dent->d_serial == serial &&
cmp < 0))))
&&
(!best ||
best->d_class < dent->d_class ||
(best->d_class == dent->d_class &&
(best->d_serial < dent->d_serial ||
(best->d_serial == dent->d_serial &&
strcmp(best->d_path, dent->d_path) < 0))))) {
best = dent;
}
}
dent = kzalloc(cookie->allocsize, GFP_MARS);
if (unlikely(!dent))
goto err_mem1;
dent->d_name = kmalloc(namlen + 1, GFP_MARS);
if (unlikely(!dent->d_name))
goto err_mem2;
dent->d_type = d_type;
dent->d_class = class;
dent->d_serial = serial;
dent->d_parent = cookie->parent;
dent->d_depth = cookie->depth;
memcpy(dent->d_name, name, namlen);
dent->d_name[namlen] = '\0';
dent->d_namelen = namlen;
dent->d_rest = dent->d_name + prefix;
dent->d_path = newpath;
dent->d_pathlen = pathlen;
down(&global->mutex);
if (best) {
list_add(&dent->sub_link, &best->sub_link);
} else {
list_add_tail(&dent->sub_link, anchor);
}
up(&global->mutex);
return 0;
err_mem2:
kfree(dent);
err_mem1:
kfree(newpath);
err_mem0:
return -ENOMEM;
}
static int _mars_dent_work(struct mars_cookie *cookie)
{
struct file *f;
mm_segment_t oldfs;
int status = 0;
oldfs = get_fs();
set_fs(get_ds());
f = filp_open(cookie->path, O_DIRECTORY | O_RDONLY, 0);
set_fs(oldfs);
if (unlikely(IS_ERR(f))) {
return PTR_ERR(f);
}
for (;;) {
status = vfs_readdir(f, mars_filler, cookie);
MARS_IO("vfs_readdir() status = %d\n", status);
if (status <= 0)
break;
}
filp_close(f, NULL);
return status;
}
int mars_dent_work(struct mars_global *global, char *dirname, int allocsize, mars_dent_checker checker, mars_dent_worker worker, void *buf, int maxdepth)
{
static int version = 0;
struct mars_cookie cookie = {
.global = global,
.checker = checker,
.path = dirname,
.pathlen = strlen(dirname),
.allocsize = allocsize,
.depth = 0,
};
struct list_head *tmp;
int rounds = 0;
int status;
int total_status = 0;
bool found_dir;
version++;
total_status = _mars_dent_work(&cookie);
if (total_status || !worker) {
goto done;
}
restart:
found_dir = false;
/* First, get all the inode information in a separate pass
* before starting work.
* The separate pass is necessary because some dents may
* forward-reference other dents, and it would be a pity if
* some inodes were not available or were outdated.
*/
for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, sub_link);
// treat any member only once during this invocation
if (dent->d_version == version)
continue;
dent->d_version = version;
MARS_IO("reading inode '%s'\n", dent->d_path);
status = get_inode(dent->d_path, dent);
total_status |= status;
// recurse into subdirectories by inserting into the flat list
if (S_ISDIR(dent->new_stat.mode) && dent->d_depth <= maxdepth) {
struct mars_cookie sub_cookie = {
.global = global,
.checker = checker,
.path = dent->d_path,
.pathlen = dent->d_pathlen,
.allocsize = allocsize,
.parent = dent,
.depth = dent->d_depth + 1,
};
found_dir = true;
status = _mars_dent_work(&sub_cookie);
total_status |= status;
if (status < 0) {
MARS_ERR("forward: status %d on '%s'\n", status, dent->d_path);
}
}
}
if (found_dir && ++rounds < 10) {
goto restart;
}
/* Forward pass.
*/
for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, sub_link);
MARS_IO("forward treat '%s'\n", dent->d_path);
status = worker(buf, dent, false);
total_status |= status;
if (status < 0)
continue;
if (status < 0) {
MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path);
}
}
/* Backward pass.
*/
for (tmp = global->dent_anchor.prev; tmp != &global->dent_anchor; tmp = tmp->prev) {
struct mars_dent *dent = container_of(tmp, struct mars_dent, sub_link);
MARS_IO("backward treat '%s'\n", dent->d_path);
status = worker(buf, dent, true);
total_status |= status;
if (status < 0) {
MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path);
}
}
done:
return total_status;
}
EXPORT_SYMBOL_GPL(mars_dent_work);
struct mars_dent *_mars_find_dent(struct mars_global *global, const char *path)
{
struct mars_dent *res = NULL;
struct list_head *tmp;
for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) {
struct mars_dent *tmp_dent = container_of(tmp, struct mars_dent, sub_link);
if (!strcmp(tmp_dent->d_path, path)) {
res = tmp_dent;
break;
}
}
return res;
}
EXPORT_SYMBOL_GPL(_mars_find_dent);
struct mars_dent *mars_find_dent(struct mars_global *global, const char *path)
{
struct mars_dent *res;
down(&global->mutex);
res = _mars_find_dent(global, path);
up(&global->mutex);
return res;
}
EXPORT_SYMBOL_GPL(mars_find_dent);
void mars_dent_free(struct mars_dent *dent)
{
int i;
list_del(&dent->sub_link);
for (i = 0; i < MARS_ARGV_MAX; i++) {
if (dent->d_argv[i])
kfree(dent->d_argv[i]);
}
if (dent->d_args)
kfree(dent->d_args);
if (dent->d_private)
kfree(dent->d_private);
if (dent->old_link)
kfree(dent->old_link);
if (dent->new_link)
kfree(dent->new_link);
kfree(dent->d_name);
kfree(dent->d_path);
kfree(dent);
}
EXPORT_SYMBOL_GPL(mars_dent_free);
void mars_dent_free_all(struct list_head *anchor)
{
while (!list_empty(anchor)) {
struct mars_dent *dent;
dent = container_of(anchor->prev, struct mars_dent, sub_link);
mars_dent_free(dent);
}
}
EXPORT_SYMBOL_GPL(mars_dent_free_all);
struct mars_brick *mars_find_brick(struct mars_global *global, const void *brick_type, const char *path)
{
struct list_head *tmp;
if (!global || !path)
return NULL;
down(&global->mutex);
for (tmp = global->brick_anchor.next; tmp != &global->brick_anchor; tmp = tmp->next) {
struct mars_brick *test = container_of(tmp, struct mars_brick, brick_link);
if (!strcmp(test->brick_path, path)) {
up(&global->mutex);
if (brick_type && test->type != brick_type) {
MARS_ERR("bad brick type\n");
return NULL;
}
return test;
}
}
up(&global->mutex);
return NULL;
}
EXPORT_SYMBOL_GPL(mars_find_brick);
struct mars_brick *mars_make_brick(struct mars_global *global, const void *_brick_type, const char *path, const char *_name)
{
const char *name = kstrdup(_name, GFP_MARS);
const char *names[] = { name };
const struct generic_brick_type *brick_type = _brick_type;
const struct generic_input_type **input_types;
const struct generic_output_type **output_types;
struct mars_brick *res;
int size;
int i;
int status;
if (!name) {
MARS_ERR("cannot allocate space for name\n");
return NULL;
}
size = brick_type->brick_size +
(brick_type->max_inputs + brick_type->max_outputs) * sizeof(void*);
input_types = brick_type->default_input_types;
for (i = 0; i < brick_type->max_inputs; i++) {
const struct generic_input_type *type = *input_types++;
if (unlikely(!type)) {
MARS_ERR("input_type %d is missing\n", i);
goto err_name;
}
size += type->input_size;
}
output_types = brick_type->default_output_types;
for (i = 0; i < brick_type->max_outputs; i++) {
const struct generic_output_type *type = *output_types++;
if (unlikely(!type)) {
MARS_ERR("output_type %d is missing\n", i);
goto err_name;
}
size += type->output_size;
}
res = kzalloc(size, GFP_MARS);
if (!res) {
MARS_ERR("cannot grab %d bytes for brick type '%s'\n", size, brick_type->type_name);
goto err_name;
}
res->brick_path = kstrdup(path, GFP_MARS);
res->global = global;
if (!res->brick_path) {
MARS_ERR("cannot grab memory for path '%s'\n", path);
goto err_res;
}
status = generic_brick_init_full(res, size, brick_type, NULL, NULL, names);
MARS_DBG("brick '%s' init '%s' '%s' (status=%d)\n", brick_type->type_name, path, name, status);
if (status < 0) {
MARS_ERR("cannot init brick %s\n", brick_type->type_name);
goto err_path;
}
/* Immediately make it visible, regardless of internal state.
* Switching on / etc must be done separately.
*/
down(&global->mutex);
list_add(&res->brick_link, &global->brick_anchor);
up(&global->mutex);
return res;
err_path:
kfree(res->brick_path);
err_res:
kfree(res);
err_name:
kfree(name);
return NULL;
}
EXPORT_SYMBOL_GPL(mars_make_brick);
/////////////////////////////////////////////////////////////////////
// meta descriptions
const struct meta mars_info_meta[] = {
META_INI(current_size, struct mars_info, FIELD_INT),
META_INI(transfer_order, struct mars_info, FIELD_INT),
META_INI(transfer_size, struct mars_info, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_info_meta);
const struct meta mars_mref_meta[] = {
META_INI(ref_pos, struct mref_object, FIELD_INT),
META_INI(ref_len, struct mref_object, FIELD_INT),
META_INI(ref_may_write, struct mref_object, FIELD_INT),
META_INI(ref_flags, struct mref_object, FIELD_INT),
META_INI(ref_rw, struct mref_object, FIELD_INT),
META_INI(ref_id, struct mref_object, FIELD_INT),
META_INI(_ref_cb.cb_error, struct mref_object, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_mref_meta);
const struct meta mars_timespec_meta[] = {
META_INI(tv_sec, struct timespec, FIELD_INT),
META_INI(tv_nsec, struct timespec, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_timespec_meta);
const struct meta mars_kstat_meta[] = {
META_INI(ino, struct kstat, FIELD_INT),
META_INI(mode, struct kstat, FIELD_INT),
META_INI(size, struct kstat, FIELD_INT),
META_INI_SUB(atime, struct kstat, mars_timespec_meta),
META_INI_SUB(mtime, struct kstat, mars_timespec_meta),
META_INI_SUB(ctime, struct kstat, mars_timespec_meta),
META_INI(blksize, struct kstat, FIELD_INT),
{}
};
EXPORT_SYMBOL_GPL(mars_kstat_meta);
const struct meta mars_dent_meta[] = {
META_INI(d_name, struct mars_dent, FIELD_STRING),
META_INI(d_rest, struct mars_dent, FIELD_STRING),
META_INI(d_path, struct mars_dent, FIELD_STRING),
META_INI(d_namelen, struct mars_dent, FIELD_INT),
META_INI(d_pathlen, struct mars_dent, FIELD_INT),
META_INI(d_type, struct mars_dent, FIELD_INT),
META_INI(d_class, struct mars_dent, FIELD_INT),
META_INI(d_version, struct mars_dent, FIELD_INT),
META_INI_SUB(new_stat,struct mars_dent, mars_kstat_meta),
META_INI_SUB(old_stat,struct mars_dent, mars_kstat_meta),
META_INI(new_link, struct mars_dent, FIELD_STRING),
META_INI(old_link, struct mars_dent, FIELD_STRING),
META_INI(d_args, struct mars_dent, FIELD_STRING),
META_INI(d_argv[0], struct mars_dent, FIELD_STRING),
META_INI(d_argv[1], struct mars_dent, FIELD_STRING),
META_INI(d_argv[2], struct mars_dent, FIELD_STRING),
META_INI(d_argv[3], struct mars_dent, FIELD_STRING),
{}
};
EXPORT_SYMBOL_GPL(mars_dent_meta);
/////////////////////////////////////////////////////////////////////
// init stuff
static int __init init_mars(void)
{
printk(MARS_INFO "init_mars()\n");
MARS_INF("init_mars()\n");
return 0;
}
static void __exit exit_mars(void)
{
printk(MARS_INFO "exit_mars()\n");
MARS_INF("exit_mars()\n");
if (id) {
kfree(id);
id = NULL;
}
}
MODULE_DESCRIPTION("MARS block storage");

View File

@ -153,10 +153,10 @@ static int if_device_make_request(struct request_queue *q, struct bio *bio)
bio->bi_check2 = 0;
bio->bi_check3 = 0;
/* THIS IS PROVISIONARY
/* FIXME: THIS IS PROVISIONARY (use event instead)
*/
while (unlikely(!brick->is_active)) {
msleep(100);
while (unlikely(!brick->power.led_on)) {
msleep(2 * HZ);
}
_CHECK_ATOMIC(&bio->bi_comp_cnt, !=, 0);
@ -369,7 +369,7 @@ static int if_device_brick_destruct(struct if_device_brick *brick)
return 0;
}
static int if_device_switch(struct if_device_brick *brick, bool state)
static int if_device_switch(struct if_device_brick *brick)
{
struct if_device_input *input = brick->inputs[0];
struct request_queue *q;
@ -379,71 +379,91 @@ static int if_device_switch(struct if_device_brick *brick, bool state)
unsigned long capacity;
int status;
//MARS_DBG("1\n");
status = GENERIC_INPUT_CALL(input, mars_get_info, &info);
if (status < 0) {
MARS_ERR("cannot get device info, status=%d\n", status);
return status;
}
capacity = info.current_size >> 9; // TODO: make this dynamic
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
status = GENERIC_INPUT_CALL(input, mars_get_info, &info);
if (status < 0) {
MARS_ERR("cannot get device info, status=%d\n", status);
return status;
}
capacity = info.current_size >> 9; // TODO: make this dynamic
q = blk_alloc_queue(GFP_MARS);
if (!q) {
MARS_ERR("cannot allocate device request queue\n");
return -ENOMEM;
}
q->queuedata = input;
input->q = q;
//MARS_DBG("2\n");
disk = alloc_disk(1);
if (!disk) {
MARS_ERR("cannot allocate gendisk\n");
return -ENOMEM;
}
q = blk_alloc_queue(GFP_MARS);
if (!q) {
MARS_ERR("cannot allocate device request queue\n");
return -ENOMEM;
}
q->queuedata = input;
input->q = q;
//MARS_DBG("2\n");
disk = alloc_disk(1);
if (!disk) {
MARS_ERR("cannot allocate gendisk\n");
return -ENOMEM;
}
//MARS_DBG("3\n");
minor = device_minor++; //TODO: protect against races (e.g. atomic_t)
disk->queue = q;
disk->major = MARS_MAJOR; //TODO: make this dynamic for >256 devices
disk->first_minor = minor;
disk->fops = &if_device_blkdev_ops;
sprintf(disk->disk_name, "mars%d", minor);
MARS_DBG("created device name %s\n", disk->disk_name);
disk->private_data = input;
set_capacity(disk, capacity);
blk_queue_make_request(q, if_device_make_request);
blk_queue_max_segment_size(q, MARS_MAX_SEGMENT_SIZE);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
q->unplug_fn = if_device_unplug;
sema_init(&input->kick_sem, 1);
spin_lock_init(&input->req_lock);
q->queue_lock = &input->req_lock; // needed!
//blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);//???
//MARS_DBG("4\n");
input->bdev = bdget(MKDEV(disk->major, minor));
/* we have no partitions. we contain only ourselves. */
input->bdev->bd_contains = input->bdev;
//MARS_DBG("3\n");
minor = device_minor++; //TODO: protect against races (e.g. atomic_t)
disk->queue = q;
disk->major = MARS_MAJOR; //TODO: make this dynamic for >256 devices
disk->first_minor = minor;
disk->fops = &if_device_blkdev_ops;
//snprintf(disk->disk_name, sizeof(disk->disk_name), "mars%d", minor);
snprintf(disk->disk_name, sizeof(disk->disk_name), "mars/%s", brick->brick_name);
MARS_DBG("created device name %s\n", disk->disk_name);
disk->private_data = input;
set_capacity(disk, capacity);
blk_queue_make_request(q, if_device_make_request);
blk_queue_max_segment_size(q, MARS_MAX_SEGMENT_SIZE);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
q->unplug_fn = if_device_unplug;
sema_init(&input->kick_sem, 1);
spin_lock_init(&input->req_lock);
q->queue_lock = &input->req_lock; // needed!
//blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);//???
//MARS_DBG("4\n");
input->bdev = bdget(MKDEV(disk->major, minor));
/* we have no partitions. we contain only ourselves. */
input->bdev->bd_contains = input->bdev;
#if 0 // ???
q->backing_dev_info.congested_fn = mars_congested;
q->backing_dev_info.congested_data = input;
q->backing_dev_info.congested_fn = mars_congested;
q->backing_dev_info.congested_data = input;
#endif
#if 0 // ???
blk_queue_merge_bvec(q, mars_merge_bvec);
blk_queue_merge_bvec(q, mars_merge_bvec);
#endif
INIT_LIST_HEAD(&input->plug_anchor);
INIT_LIST_HEAD(&input->plug_anchor);
// point of no return
//MARS_DBG("99999\n");
add_disk(disk);
input->disk = disk;
//set_device_ro(input->bdev, 0); // TODO: implement modes
brick->is_active = true;
// point of no return
//MARS_DBG("99999\n");
add_disk(disk);
input->disk = disk;
//set_device_ro(input->bdev, 0); // TODO: implement modes
mars_power_led_on((void*)brick, true);
} else {
mars_power_led_on((void*)brick, false);
if (input->bdev) {
bdput(input->bdev);
input->bdev = NULL;
}
disk = input->disk;
if (disk) {
q = disk->queue;
del_gendisk(input->disk);
put_disk(input->disk);
input->disk = NULL;
if (q) {
blk_cleanup_queue(q);
}
}
//........
mars_power_led_off((void*)brick, true);
}
return 0;
}
@ -518,7 +538,7 @@ EXPORT_SYMBOL_GPL(if_device_brick_type);
static void __exit exit_if_device(void)
{
int status;
printk(MARS_INFO "exit_if_device()\n");
MARS_INF("exit_if_device()\n");
status = if_device_unregister_brick_type();
unregister_blkdev(DRBD_MAJOR, "mars");
}
@ -529,7 +549,7 @@ static int __init init_if_device(void)
(void)if_device_aspect_types; // not used, shut up gcc
printk(MARS_INFO "init_if_device()\n");
MARS_INF("init_if_device()\n");
status = register_blkdev(DRBD_MAJOR, "mars");
if (status)
return status;

View File

@ -42,7 +42,6 @@ struct if_device_output {
struct if_device_brick {
MARS_BRICK(if_device);
bool is_active;
struct if_device_output hidden_output;
};

1850
mars_light.c Normal file

File diff suppressed because it is too large Load Diff

684
mars_net.c Normal file
View File

@ -0,0 +1,684 @@
// (c) 2011 Thomas Schoebel-Theuer / 1&1 Internet AG
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
//#define IO_DEBUGGING
#ifdef IO_DEBUGGING
#define MARS_IO MARS_DBG
#else
#define MARS_IO(args...) /*empty*/
#endif
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#define _STRATEGY
#include "mars.h"
#include "mars_net.h"
/* Low-level network traffic
*/
/* TODO: allow binding to specific source addresses instead of catch-all.
* TODO: make all the socket options configurable.
* TODO: implement signal handling.
* TODO: add authentication.
* TODO: add compression / encryption.
*/
struct mars_tcp_params default_tcp_params = {
.window_size = 8 * 1024 * 1024, // for long distance replications
.tcp_timeout = 20,
.tcp_keepcnt = 6,
.tcp_keepintvl = 10, // keepalive ping time
.tcp_keepidle = 10,
.tos = IPTOS_LOWDELAY,
};
EXPORT_SYMBOL(default_tcp_params);
static void _check(int status)
{
if (status < 0) {
MARS_ERR("cannot set socket option, status = %d\n", status);
}
}
int mars_create_sockaddr(struct sockaddr_storage *addr, const char *spec)
{
struct sockaddr_in *sockaddr = (void*)addr;
int status;
memset(addr, sizeof(*addr), 0);
sockaddr->sin_family = AF_INET;
sockaddr->sin_port = htons(MARS_DEFAULT_PORT);
/* This is PROVISIONARY!
* TODO: add IPV6 syntax and many more features :)
*/
if (!*spec)
return 0;
if (*spec != ':') {
unsigned char u0 = 0, u1 = 0, u2 = 0, u3 = 0;
status = sscanf(spec, "%hhu.%hhu.%hhu.%hhu", &u0, &u1, &u2, &u3);
if (status != 4)
return -EINVAL;
sockaddr->sin_addr.s_addr = (__be32)u0 | (__be32)u1 << 8 | (__be32)u2 << 16 | (__be32)u3 << 24;
}
while (*spec && *spec++ != ':')
/*empty*/;
if (*spec) {
int port = 0;
status = sscanf(spec, "%d", &port);
if (status != 1)
return -EINVAL;
sockaddr->sin_port = htons(port);
}
return 0;
}
EXPORT_SYMBOL_GPL(mars_create_sockaddr);
int mars_create_socket(struct socket **sock, struct sockaddr_storage *addr, bool is_server)
{
struct sockaddr null_bind = {};
struct sockaddr *sockaddr = (void*)addr;
int x_true = 1;
int status;
if (!is_server) {
sockaddr = &null_bind;
}
status = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, sock);
if (status < 0) {
*sock = NULL;
MARS_ERR("cannot create socket, status = %d\n", status);
return status;
}
/* TODO: improve this by a table-driven approach
*/
(*sock)->sk->sk_rcvtimeo = (*sock)->sk->sk_sndtimeo = default_tcp_params.tcp_timeout * HZ;
status = kernel_setsockopt(*sock, SOL_SOCKET, SO_SNDBUF, (char*)&default_tcp_params.window_size, sizeof(default_tcp_params.window_size));
_check(status);
status = kernel_setsockopt(*sock, SOL_SOCKET, SO_RCVBUF, (char*)&default_tcp_params.window_size, sizeof(default_tcp_params.window_size));
_check(status);
status = kernel_setsockopt(*sock, SOL_IP, SO_PRIORITY, (char*)&default_tcp_params.tos, sizeof(default_tcp_params.tos));
_check(status);
status = kernel_setsockopt(*sock, IPPROTO_TCP, TCP_NODELAY, (char*)&x_true, sizeof(x_true));
_check(status);
status = kernel_setsockopt(*sock, SOL_SOCKET, SO_KEEPALIVE, (char*)&x_true, sizeof(x_true));
_check(status);
status = kernel_setsockopt(*sock, IPPROTO_TCP, TCP_KEEPCNT, (char*)&default_tcp_params.tcp_keepcnt, sizeof(default_tcp_params.tcp_keepcnt));
_check(status);
status = kernel_setsockopt(*sock, IPPROTO_TCP, TCP_KEEPINTVL, (char*)&default_tcp_params.tcp_keepintvl, sizeof(default_tcp_params.tcp_keepintvl));
_check(status);
status = kernel_setsockopt(*sock, IPPROTO_TCP, TCP_KEEPIDLE, (char*)&default_tcp_params.tcp_keepidle, sizeof(default_tcp_params.tcp_keepidle));
_check(status);
status = kernel_bind(*sock, sockaddr, sizeof(*sockaddr));
if (status < 0) {
MARS_ERR("bind failed, status = %d\n", status);
return status;
}
if (!is_server) {
sockaddr = (void*)addr;
status = kernel_connect(*sock, sockaddr, sizeof(*sockaddr), 0);
if (status < 0) {
MARS_ERR("connect failed, status = %d\n", status);
}
}
return status;
}
EXPORT_SYMBOL_GPL(mars_create_socket);
int mars_send(struct socket **sock, void *buf, int len)
{
struct kvec iov = {
.iov_base = buf,
.iov_len = len,
};
struct msghdr msg = {
.msg_iov = (struct iovec*)&iov,
.msg_flags = 0 /* | MSG_NOSIGNAL*/,
};
int status = -EIDRM;
int sent = 0;
//MARS_IO("buf = %p, len = %d\n", buf, len);
while (sent < len) {
mm_segment_t oldfs;
if (unlikely(!*sock)) {
MARS_ERR("socket has disappeared\n");
status = -EIDRM;
goto done;
}
oldfs = get_fs();
set_fs(get_ds());
status = kernel_sendmsg(*sock, &msg, &iov, 1, len);
set_fs(oldfs);
if (status == -EAGAIN) {
msleep(50);
continue;
}
if (status == -EINTR) { // ignore it
flush_signals(current);
msleep(50);
continue;
}
if (status < 0) {
MARS_ERR("bad socket sendmsg, len=%d, iov_len=%d, sent=%d, status = %d\n", len, (int)iov.iov_len, sent, status);
goto done;
}
if (!status) {
MARS_ERR("EOF from socket upon sendmsg\n");
status = -ECOMM;
goto done;
}
iov.iov_base += status;
iov.iov_len -= status;
sent += status;
}
status = sent;
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_send);
int mars_recv(struct socket **sock, void *buf, int minlen, int maxlen)
{
int status = -EIDRM;
int done = 0;
if (!buf) {
MARS_ERR("bad receive buffer\n");
return -EINVAL;
}
while (done < minlen) {
mm_segment_t oldfs;
struct kvec iov = {
.iov_base = buf + done,
.iov_len = maxlen - done,
};
struct msghdr msg = {
.msg_iovlen = 1,
.msg_iov = (struct iovec*)&iov,
.msg_flags = 0 | MSG_WAITALL /*| MSG_NOSIGNAL*/,
};
if (unlikely(!*sock)) {
MARS_ERR("socket has disappeared\n");
status = -EIDRM;
goto err;
}
MARS_IO("done %d, fetching %d bytes\n", done, maxlen-done);
oldfs = get_fs();
set_fs(get_ds());
status = kernel_recvmsg(*sock, &msg, &iov, 1, maxlen-done, msg.msg_flags);
set_fs(oldfs);
if (status == -EAGAIN) {
#if 0
if (!done)
goto err;
#endif
msleep(50);
continue;
}
if (!status) { // EOF
MARS_ERR("got EOF (done=%d, req_size=%d)\n", done, maxlen-done);
status = -EPIPE;
goto err;
}
if (status < 0) {
MARS_ERR("bad recvmsg, status = %d\n", status);
goto err;
}
done += status;
}
status = done;
err:
return status;
}
EXPORT_SYMBOL_GPL(mars_recv);
///////////////////////////////////////////////////////////////////////
/* Mid-level field data exchange
*/
/* TODO: make this bytesex-aware
*/
#define MARS_NET_MAGIC 0x63f092ec6048f48cll
struct mars_net_header {
u64 h_magic;
char h_name[MAX_FIELD_LEN];
u16 h_seq;
u16 h_len;
};
int _mars_send_struct(struct socket **sock, void *data, const struct meta *meta, int *seq)
{
int count = 0;
int status = 0;
if (!data) { // send EOF
struct mars_net_header header = {
.h_magic = MARS_NET_MAGIC,
.h_seq = -1,
};
return mars_send(sock, &header, sizeof(header));
}
for (; ; meta++) {
struct mars_net_header header = {
.h_magic = MARS_NET_MAGIC,
.h_seq = ++(*seq),
};
void *item = data + meta->field_offset;
int len = meta->field_size;
#if 1
if (len > 16 * PAGE_SIZE) {
MARS_ERR("implausible len=%d, \n", len);
msleep(30000);
status = -EINVAL;
break;
}
#endif
/* Automatically keep the lamport clock correct.
*/
if (meta == mars_cmd_meta) {
struct timespec *stamp = &((struct mars_cmd*)data)->cmd_stamp;
get_lamport(stamp);
} else if (meta == mars_timespec_meta) {
set_lamport(data);
}
status = 0;
switch (meta->field_type) {
case FIELD_STRING:
item = *(void**)item;
len = 0;
if (item)
len = strlen(item);
break;
case FIELD_REF:
if (!meta->field_ref) {
MARS_ERR("improper FIELD_REF definition\n");
status = -EINVAL;
break;
}
item = *(void**)item;
len = meta->field_ref->field_size;
if (!item)
len = 0;
break;
case FIELD_DONE:
len = 0;
case FIELD_SUB:
case FIELD_RAW:
case FIELD_INT:
case FIELD_UINT:
// all ok
break;
default:
MARS_ERR("invalid field type %d\n", meta->field_type);
status = -EINVAL;
break;
}
if (status < 0)
break;
header.h_len = len;
strncpy(header.h_name, meta->field_name, MAX_FIELD_LEN);
MARS_IO("sending header %d '%s' len = %d\n", header.h_seq, meta->field_name, len);
status = mars_send(sock, &header, sizeof(header));
if (status < 0 || !meta->field_name[0]) { // EOR
break;
}
switch (meta->field_type) {
case FIELD_REF:
case FIELD_SUB:
status = _mars_send_struct(sock, item, meta->field_ref, seq);
if (status > 0)
count += status;
break;
default:
if (len > 0) {
MARS_IO("sending extra %d\n", len);
status = mars_send(sock, item, len);
if (status > 0)
count++;
}
}
if (status < 0) {
break;
}
}
if (status >= 0)
status = count;
return status;
}
int mars_send_struct(struct socket **sock, void *data, const struct meta *meta)
{
int seq = 0;
return _mars_send_struct(sock, data, meta, &seq);
}
EXPORT_SYMBOL_GPL(mars_send_struct);
int _mars_recv_struct(struct socket **sock, void *data, const struct meta *meta, int *seq)
{
int count = 0;
int status = -EINVAL;
//MARS_IO("\n");
if (!data) {
goto done;
}
for (;;) {
struct mars_net_header header = {};
const struct meta *tmp;
void *item;
void *mem;
status = mars_recv(sock, &header, sizeof(header), sizeof(header));
if (status == -EAGAIN) {
msleep(50);
continue;
}
if (status < 0) {
MARS_ERR("status = %d\n", status);
break;
}
MARS_IO("got header %d '%s' len = %d\n", header.h_seq, header.h_name, header.h_len);
if (header.h_magic != MARS_NET_MAGIC) {
MARS_ERR("bad packet header magic = %llx\n", header.h_magic);
status = -ENOMSG;
break;
}
if (header.h_seq == -1) { // got EOF
status = 0;
break;
};
if (header.h_seq <= *seq) {
MARS_ERR("unexpected packet data, seq=%d (expected=%d)\n", header.h_seq, (*seq) + 1);
status = -ENOMSG;
break;
}
*seq = header.h_seq;
if (!header.h_name[0]) { // end of record (EOR)
status = 0;
break;
}
tmp = find_meta(meta, header.h_name);
if (!tmp) {
MARS_ERR("unknown field '%s'\n", header.h_name);
if (header.h_len > 0) { // try to continue by skipping the rest of data
void *dummy = kmalloc(header.h_len, GFP_MARS);
status = -ENOMEM;
if (!dummy)
break;
status = mars_recv(sock, dummy, header.h_len, header.h_len);
kfree(dummy);
if (status < 0)
break;
}
continue;
}
status = 0;
item = data + tmp->field_offset;
switch (tmp->field_type) {
case FIELD_REF:
case FIELD_STRING:
if (header.h_len <= 0) {
mem = NULL;
} else {
mem = kzalloc(header.h_len + 1, GFP_MARS);
if (!mem) {
status = -ENOMEM;
goto done;
}
}
*(void**)item = mem;
item = mem;
break;
}
switch (tmp->field_type) {
case FIELD_REF:
case FIELD_SUB:
if (!item) {
MARS_ERR("bad item\n");
status = -EINVAL;
break;
}
MARS_IO("starting recursive structure\n");
status = _mars_recv_struct(sock, item, tmp->field_ref, seq);
MARS_IO("ending recursive structure, status = %d\n", status);
if (status > 0)
count += status;
break;
default:
if (header.h_len > 0) {
if (!item) {
MARS_ERR("bad item\n");
status = -EINVAL;
break;
}
MARS_IO("reading extra %d\n", header.h_len);
status = mars_recv(sock, item, header.h_len, header.h_len);
while (status == -EAGAIN) {
msleep(50);
status = mars_recv(sock, item, header.h_len, header.h_len);
}
if (status >= 0) {
//MARS_IO("got data len = %d status = %d\n", header.h_len, status);
count++;
} else {
MARS_ERR("len = %d, status = %d\n", header.h_len, status);
}
}
}
if (status < 0)
break;
}
done:
if (status >= 0) {
status = count;
if (meta == mars_timespec_meta)
set_lamport(data);
} else {
MARS_ERR("status = %d\n", status);
}
return status;
}
int mars_recv_struct(struct socket **sock, void *data, const struct meta *meta)
{
int seq = 0;
return _mars_recv_struct(sock, data, meta, &seq);
}
EXPORT_SYMBOL_GPL(mars_recv_struct);
///////////////////////////////////////////////////////////////////////
/* High-level transport of mars structures
*/
const struct meta mars_cmd_meta[] = {
META_INI_SUB(cmd_stamp, struct mars_cmd, mars_timespec_meta),
META_INI(cmd_code, struct mars_cmd, FIELD_INT),
META_INI(cmd_int1, struct mars_cmd, FIELD_INT),
META_INI(cmd_str1, struct mars_cmd, FIELD_STRING),
{}
};
EXPORT_SYMBOL_GPL(mars_cmd_meta);
int mars_send_dent_list(struct socket **sock, struct list_head *anchor)
{
struct list_head *tmp;
struct mars_dent *dent;
int status = 0;
for (tmp = anchor->next; tmp != anchor; tmp = tmp->next) {
dent = container_of(tmp, struct mars_dent, sub_link);
status = mars_send_struct(sock, dent, mars_dent_meta);
if (status < 0)
break;
}
if (status >= 0) { // send EOF
status = mars_send_struct(sock, NULL, mars_dent_meta);
}
return status;
}
EXPORT_SYMBOL_GPL(mars_send_dent_list);
int mars_recv_dent_list(struct socket **sock, struct list_head *anchor)
{
int status;
for (;;) {
struct mars_dent *dent = kzalloc(sizeof(struct mars_dent), GFP_MARS);
if (!dent)
return -ENOMEM;
//MARS_IO("\n");
status = mars_recv_struct(sock, dent, mars_dent_meta);
if (status <= 0) {
kfree(dent);
goto done;
}
list_add_tail(&dent->sub_link, anchor);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_recv_dent_list);
int mars_send_mref(struct socket **sock, struct mref_object *mref)
{
struct mars_cmd cmd = {
.cmd_code = CMD_MREF,
.cmd_int1 = mref->ref_id,
};
int status;
status = mars_send_struct(sock, &cmd, mars_cmd_meta);
if (status < 0)
goto done;
status = mars_send_struct(sock, mref, mars_mref_meta);
if (status < 0)
goto done;
if (mref->ref_rw) {
status = mars_send(sock, mref->ref_data, mref->ref_len);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_send_mref);
int mars_recv_mref(struct socket **sock, struct mref_object *mref)
{
int status;
status = mars_recv_struct(sock, mref, mars_mref_meta);
if (status < 0)
goto done;
if (mref->ref_rw) {
if (!mref->ref_data)
mref->ref_data = kzalloc(mref->ref_len, GFP_MARS);
if (!mref->ref_data) {
status = -ENOMEM;
goto done;
}
status = mars_recv(sock, mref->ref_data, mref->ref_len, mref->ref_len);
if (status < 0)
MARS_ERR("mref_len = %d, status = %d\n", mref->ref_len, status);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_recv_mref);
int mars_send_cb(struct socket **sock, struct mref_object *mref)
{
struct mars_cmd cmd = {
.cmd_code = CMD_CB,
.cmd_int1 = mref->ref_id,
};
int status;
status = mars_send_struct(sock, &cmd, mars_cmd_meta);
if (status < 0)
goto done;
status = mars_send_struct(sock, mref, mars_mref_meta);
if (status < 0)
goto done;
if (!mref->ref_rw) {
MARS_IO("sending blocklen = %d\n", mref->ref_len);
status = mars_send(sock, mref->ref_data, mref->ref_len);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_send_cb);
int mars_recv_cb(struct socket **sock, struct mref_object *mref)
{
int status;
status = mars_recv_struct(sock, mref, mars_mref_meta);
if (status < 0)
goto done;
if (!mref->ref_rw) {
if (!mref->ref_data) {
MARS_ERR("no internal buffer available\n");
status = -EINVAL;
goto done;
}
MARS_IO("receiving blocklen = %d\n", mref->ref_len);
status = mars_recv(sock, mref->ref_data, mref->ref_len, mref->ref_len);
}
done:
return status;
}
EXPORT_SYMBOL_GPL(mars_recv_cb);
////////////////// module init stuff /////////////////////////
static int __init _init_net(void)
{
MARS_INF("init_net()\n");
return 0;
}
static void __exit _exit_net(void)
{
MARS_INF("exit_net()\n");
}
MODULE_DESCRIPTION("MARS network infrastructure");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(_init_net);
module_exit(_exit_net);

70
mars_net.h Normal file
View File

@ -0,0 +1,70 @@
// (c) 2011 Thomas Schoebel-Theuer / 1&1 Internet AG
#ifndef MARS_NET_H
#define MARS_NET_H
#include <net/sock.h>
#include <net/ipconfig.h>
#include <net/tcp.h>
#include "brick.h"
#define MARS_DEFAULT_PORT 7777
struct mars_tcp_params {
int tcp_timeout;
int window_size;
int tcp_keepcnt;
int tcp_keepintvl;
int tcp_keepidle;
char tos;
};
extern struct mars_tcp_params default_tcp_params;
enum {
CMD_NOP,
CMD_STATUS,
CMD_GETINFO,
CMD_GETENTS,
CMD_CONNECT,
CMD_MREF,
CMD_CB,
};
struct mars_cmd {
struct timespec cmd_stamp; // for automatic lamport clock
int cmd_code;
int cmd_int1;
//int cmd_int2;
//int cmd_int3;
char *cmd_str1;
//char *cmd_str2;
//char *cmd_str3;
};
extern const struct meta mars_cmd_meta[];
/* Low-level network traffic
*/
extern int mars_create_sockaddr(struct sockaddr_storage *addr, const char *spec);
extern int mars_create_socket(struct socket **sock, struct sockaddr_storage *addr, bool is_server);
extern int mars_send(struct socket **sock, void *buf, int len);
extern int mars_recv(struct socket **sock, void *buf, int minlen, int maxlen);
/* Mid-level generic field data exchange
*/
extern int mars_send_struct(struct socket **sock, void *data, const struct meta *meta);
extern int mars_recv_struct(struct socket **sock, void *data, const struct meta *meta);
/* High-level transport of mars structures
*/
extern int mars_send_dent_list(struct socket **sock, struct list_head *anchor);
extern int mars_recv_dent_list(struct socket **sock, struct list_head *anchor);
extern int mars_send_mref(struct socket **sock, struct mref_object *mref);
extern int mars_recv_mref(struct socket **sock, struct mref_object *mref);
extern int mars_send_cb(struct socket **sock, struct mref_object *mref);
extern int mars_recv_cb(struct socket **sock, struct mref_object *mref);
#endif

516
mars_server.c Normal file
View File

@ -0,0 +1,516 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
// Server brick (just for demonstration)
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
//#define IO_DEBUGGING
#ifdef IO_DEBUGGING
#define MARS_IO MARS_DBG
#else
#define MARS_IO(args...) /*empty*/
#endif
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kthread.h>
#define _STRATEGY
#include "mars.h"
///////////////////////// own type definitions ////////////////////////
#include "mars_server.h"
static struct socket *server_socket = NULL;
static struct task_struct *server_thread = NULL;
///////////////////////// own helper functions ////////////////////////
static int server_checker(const char *path, const char *name, int namlen, unsigned int d_type, int *prefix, int *serial)
{
return 0;
}
static int server_worker(struct mars_global *global, struct mars_dent *dent, bool direction)
{
return 0;
}
static void server_endio(struct generic_callback *cb)
{
struct server_mref_aspect *mref_a;
struct mref_object *mref;
struct server_brick *brick;
struct socket **sock;
int status;
mref_a = cb->cb_private;
CHECK_PTR(mref_a, err);
mref = mref_a->object;
CHECK_PTR(mref, err);
brick = mref_a->brick;
CHECK_PTR(brick, err);
sock = mref_a->sock;
CHECK_PTR(sock, err);
down(&brick->socket_sem);
status = mars_send_cb(sock, mref);
up(&brick->socket_sem);
if (status < 0) {
MARS_ERR("cannot send response, status = %d\n", status);
kernel_sock_shutdown(*sock, SHUT_WR);
}
atomic_dec(&brick->in_flight);
return;
err:
MARS_FAT("cannot handle callback - giving up\n");
}
static int handler_thread(void *data)
{
struct server_brick *brick = data;
struct socket **sock = &brick->handler_socket;
int max_round = 300;
int timeout;
int status = 0;
brick->handler_thread = NULL;
wake_up_interruptible(&brick->startup_event);
MARS_DBG("--------------- handler_thread starting on socket %p\n", *sock);
if (!*sock)
goto done;
//fake_mm();
while (!kthread_should_stop()) {
struct mars_cmd cmd = {};
status = mars_recv_struct(sock, &cmd, mars_cmd_meta);
if (status < 0) {
MARS_ERR("command status = %d\n", status);
break;
}
MARS_IO("cmd = %d\n", cmd.cmd_code);
status = -EPROTO;
switch (cmd.cmd_code) {
case CMD_NOP:
MARS_DBG("got NOP operation\n");
status = 0;
break;
case CMD_STATUS:
//...
MARS_ERR("NYI\n");
break;
case CMD_GETINFO:
{
struct mars_info info = {};
status = GENERIC_INPUT_CALL(brick->inputs[0], mars_get_info, &info);
if (status < 0) {
break;
}
status = mars_send_struct(sock, &cmd, mars_cmd_meta);
if (status < 0) {
break;
}
status = mars_send_struct(sock, &info, mars_info_meta);
break;
}
case CMD_GETENTS:
{
struct mars_global glob_tmp = {
.dent_anchor = LIST_HEAD_INIT(glob_tmp.dent_anchor),
.brick_anchor = LIST_HEAD_INIT(glob_tmp.brick_anchor),
.mutex = __SEMAPHORE_INITIALIZER(glob_tmp.mutex, 1),
};
status = -EINVAL;
if (!cmd.cmd_str1)
break;
status = mars_dent_work(&glob_tmp, cmd.cmd_str1, sizeof(struct mars_dent), server_checker, server_worker, NULL, cmd.cmd_int1);
MARS_DBG("dents status = %d\n", status);
if (status < 0)
break;
down(&brick->socket_sem);
status = mars_send_dent_list(sock, &glob_tmp.dent_anchor);
up(&brick->socket_sem);
if (status < 0) {
MARS_ERR("could not send dentry information, status = %d\n", status);
}
mars_dent_free_all(&glob_tmp.dent_anchor);
break;
}
case CMD_CONNECT:
{
struct mars_brick *prev;
//TODO: fix possible races
prev = mars_find_brick(mars_global, NULL, cmd.cmd_str1); if (likely(prev)) {
status = generic_connect((void*)brick->inputs[0], (void*)prev->outputs[0]);
} else {
MARS_ERR("cannot find brick '%s'\n", cmd.cmd_str1 ? cmd.cmd_str1 : "NULL");
status = -EINVAL;
}
cmd.cmd_int1 = status;
status = mars_send_struct(sock, &cmd, mars_cmd_meta);
break;
}
case CMD_MREF:
{
struct mref_object *mref;
struct server_mref_aspect *mref_a;
mref = server_alloc_mref(&brick->hidden_output, &brick->mref_object_layout);
status = -ENOMEM;
if (!mref)
break;
mref_a = server_mref_get_aspect(&brick->hidden_output, mref);
if (unlikely(!mref_a)) {
kfree(mref);
break;
}
status = mars_recv_mref(sock, mref);
if (status < 0)
break;
mref_a->brick = brick;
mref_a->sock = sock;
mref->_ref_cb.cb_private = mref_a;
mref->_ref_cb.cb_fn = server_endio;
mref->ref_cb = &mref->_ref_cb;
atomic_inc(&brick->in_flight);
status = GENERIC_INPUT_CALL(brick->inputs[0], mref_get, mref);
if (status < 0) {
MARS_INF("execution error = %d\n", status);
mref->_ref_cb.cb_error = status;
server_endio(&mref->_ref_cb);
mars_free_mref(mref);
status = 0; // continue serving requests
break;
}
GENERIC_INPUT_CALL(brick->inputs[0], mref_io, mref);
GENERIC_INPUT_CALL(brick->inputs[0], mref_put, mref);
break;
}
case CMD_CB:
MARS_ERR("oops, as a server I should never get CMD_CB; something is wrong here - attack attempt??\n");
break;
default:
MARS_ERR("unknown command %d\n", cmd.cmd_code);
}
if (status < 0)
break;
}
//kernel_sock_shutdown(*sock, SHUT_WR);
sock_release(*sock);
//cleanup_mm();
done:
MARS_DBG("handler_thread terminating, status = %d\n", status);
mars_power_button((void*)brick, false);
do {
int status;
if (!brick->ops || !brick->ops->brick_switch) {
MARS_FAT("cannot switch off - this will do no real harm, but leave a memory leak\n");
break;
}
status = brick->ops->brick_switch(brick);
if (status < 0) {
MARS_ERR("server shutdown failed, status = %d\n", status);
} else if (max_round-- < 0)
break;
msleep(1000);
} while (!brick->power.led_off);
if (brick->inputs[0] && brick->inputs[0]->connect) {
MARS_DBG("disconnecting input %p\n", brick->inputs[0]->connect);
(void)generic_disconnect((void*)brick->inputs[0]);
}
timeout = 60 * 1000;
while (atomic_read(&brick->in_flight) || !brick->power.led_off) {
MARS_ERR("server brick has resources allocated - cannot terminate thread\n");
msleep(timeout);
if (timeout < 3600 * 1000)
timeout += 30 * 1000;
}
(void)generic_brick_exit_full((void*)brick);
MARS_DBG("done\n");
return 0;
}
////////////////// own brick / input / output operations //////////////////
static int server_get_info(struct server_output *output, struct mars_info *info)
{
struct server_input *input = output->brick->inputs[0];
return GENERIC_INPUT_CALL(input, mars_get_info, info);
}
static int server_ref_get(struct server_output *output, struct mref_object *mref)
{
struct server_input *input = output->brick->inputs[0];
return GENERIC_INPUT_CALL(input, mref_get, mref);
}
static void server_ref_put(struct server_output *output, struct mref_object *mref)
{
struct server_input *input = output->brick->inputs[0];
GENERIC_INPUT_CALL(input, mref_put, mref);
}
static void server_ref_io(struct server_output *output, struct mref_object *mref)
{
struct server_input *input = output->brick->inputs[0];
GENERIC_INPUT_CALL(input, mref_io, mref);
}
static int server_switch(struct server_brick *brick)
{
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
MARS_INF("starting.....");
mars_power_led_on((void*)brick, true);
} else {
mars_power_led_on((void*)brick, false);
mars_power_led_off((void*)brick, true);
}
return 0;
}
//////////////// object / aspect constructors / destructors ///////////////
static int server_mref_aspect_init_fn(struct generic_aspect *_ini, void *_init_data)
{
struct server_mref_aspect *ini = (void*)_ini;
(void)ini;
return 0;
}
static void server_mref_aspect_exit_fn(struct generic_aspect *_ini, void *_init_data)
{
struct server_mref_aspect *ini = (void*)_ini;
(void)ini;
}
MARS_MAKE_STATICS(server);
////////////////////// brick constructors / destructors ////////////////////
static int server_brick_construct(struct server_brick *brick)
{
struct server_output *hidden = &brick->hidden_output;
_server_output_init(brick, hidden, "internal");
init_waitqueue_head(&brick->startup_event);
sema_init(&brick->socket_sem, 1);
return 0;
}
static int server_output_construct(struct server_output *output)
{
return 0;
}
///////////////////////// static structs ////////////////////////
static struct server_brick_ops server_brick_ops = {
.brick_switch = server_switch,
};
static struct server_output_ops server_output_ops = {
.make_object_layout = server_make_object_layout,
.mars_get_info = server_get_info,
.mref_get = server_ref_get,
.mref_put = server_ref_put,
.mref_io = server_ref_io,
};
const struct server_input_type server_input_type = {
.type_name = "server_input",
.input_size = sizeof(struct server_input),
};
static const struct server_input_type *server_input_types[] = {
&server_input_type,
};
const struct server_output_type server_output_type = {
.type_name = "server_output",
.output_size = sizeof(struct server_output),
.master_ops = &server_output_ops,
.output_construct = &server_output_construct,
.aspect_types = server_aspect_types,
.layout_code = {
[BRICK_OBJ_MREF] = LAYOUT_ALL,
}
};
static const struct server_output_type *server_output_types[] = {
&server_output_type,
};
const struct server_brick_type server_brick_type = {
.type_name = "server_brick",
.brick_size = sizeof(struct server_brick),
.max_inputs = 1,
.max_outputs = 0,
.master_ops = &server_brick_ops,
.default_input_types = server_input_types,
.default_output_types = server_output_types,
.brick_construct = &server_brick_construct,
};
EXPORT_SYMBOL_GPL(server_brick_type);
///////////////////////////////////////////////////////////////////////
// strategy layer
static int _server_thread(void *data)
{
char *id = my_id();
int version = 0;
int status = 0;
//fake_mm();
MARS_INF("-------- server starting on host '%s' ----------\n", id);
while (!kthread_should_stop()) {
int size;
struct server_brick *brick;
struct task_struct *thread;
struct socket *new_socket = NULL;
int status;
status = kernel_accept(server_socket, &new_socket, O_NONBLOCK);
if (status < 0) {
msleep(500);
if (status == -EAGAIN)
continue; // without error message
MARS_ERR("accept status = %d\n", status);
continue;
}
if (!new_socket) {
MARS_ERR("got no socket\n");
msleep(3000);
continue;
}
MARS_DBG("got new connection %p\n", new_socket);
/* TODO: check authorization.
*/
size = server_brick_type.brick_size +
(server_brick_type.max_inputs + server_brick_type.max_outputs) * sizeof(void*) +
sizeof(struct server_input),
brick = kzalloc(size, GFP_MARS);
if (!brick) {
MARS_ERR("cannot allocate server instance\n");
goto err;
}
status = generic_brick_init_full(brick, size, (void*)&server_brick_type, NULL, NULL, NULL);
if (status) {
MARS_ERR("cannot init server brick, status = %d\n", status);
goto err;
}
thread = kthread_create(handler_thread, brick, "mars_handler%d", version++);
if (IS_ERR(thread)) {
MARS_ERR("cannot create thread, status = %ld\n", PTR_ERR(thread));
goto err;
}
brick->handler_thread = thread;
brick->handler_socket = new_socket;
wake_up_process(thread);
wait_event_interruptible(brick->startup_event, brick->handler_thread == NULL);
continue;
err:
if (new_socket) {
kernel_sock_shutdown(new_socket, SHUT_WR);
sock_release(new_socket);
}
}
MARS_INF("-------- cleaning up ----------\n");
//cleanup_mm();
MARS_INF("-------- done status = %d ----------\n", status);
server_thread = NULL;
return status;
}
////////////////// module init stuff /////////////////////////
static int __init init_server(void)
{
struct sockaddr_storage sockaddr = {};
struct task_struct *thread;
int status;
MARS_INF("init_server()\n");
status = mars_create_sockaddr(&sockaddr, "");
if (status < 0)
return status;
status = mars_create_socket(&server_socket, &sockaddr, true);
if (status < 0)
return status;
status = kernel_listen(server_socket, 100);
if (status < 0)
return status;
thread = kthread_create(_server_thread, NULL, "mars_server");
if (IS_ERR(thread)) {
return PTR_ERR(thread);
}
server_thread = thread;
wake_up_process(thread);
return server_register_brick_type();
}
static void __exit exit_server(void)
{
MARS_INF("exit_server()\n");
server_unregister_brick_type();
if (server_thread) {
if (server_socket) {
kernel_sock_shutdown(server_socket, SHUT_WR);
}
kthread_stop(server_thread);
if (server_socket && !server_thread) {
sock_release(server_socket);
server_socket = NULL;
}
}
}
MODULE_DESCRIPTION("MARS server brick");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(init_server);
module_exit(exit_server);

40
mars_server.h Normal file
View File

@ -0,0 +1,40 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
#ifndef MARS_SERVER_H
#define MARS_SERVER_H
#include <linux/wait.h>
#include "mars_net.h"
//extern struct socket *server_socket;
//extern struct task_struct *server_thread;
//extern wait_queue_head_t server_event;
struct server_mref_aspect {
GENERIC_ASPECT(mref);
struct server_brick *brick;
struct socket **sock;
};
struct server_output {
MARS_OUTPUT(server);
};
struct server_brick {
MARS_BRICK(server);
atomic_t in_flight;
struct socket *handler_socket;
struct semaphore socket_sem;
struct task_struct *handler_thread;
wait_queue_head_t startup_event;
struct generic_object_layout mref_object_layout;
struct server_output hidden_output;
};
struct server_input {
MARS_INPUT(server);
};
MARS_TYPES(server);
#endif

View File

@ -77,7 +77,7 @@ static struct device_sio_brick *_device_brick = NULL;
void make_test_instance(void)
{
static char *names[] = { "brick" };
static const char *names[] = { "brick" };
struct generic_output *first = NULL;
struct generic_output *inter = NULL;
struct generic_input *last = NULL;
@ -154,7 +154,8 @@ void make_test_instance(void)
#ifdef CONF_FDSYNC
_device_brick->outputs[0]->o_fdsync = true;
#endif
device_brick->ops->brick_switch(device_brick, true);
mars_power_button((void*)device_brick, true);
device_brick->ops->brick_switch(device_brick);
first = device_brick->outputs[0];
// last
@ -269,7 +270,8 @@ void make_test_instance(void)
MARS_INF("------------- START GATE --------------\n");
_if_brick->ops->brick_switch(_if_brick, true);
mars_power_button((void*)if_brick, true);
_if_brick->ops->brick_switch(_if_brick);
//_if_brick->is_active = true;
msleep(2000);

View File

@ -1,6 +1,6 @@
// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
// Trans_Logger brick (just for demonstration)
// Trans_Logger brick
//#define BRICK_DEBUGGING
//#define MARS_DEBUGGING
@ -17,10 +17,9 @@
#include "mars_trans_logger.h"
//#define inline /**/
#if 0
#define inline __attribute__((__noinline__))
#define _noinline /**/
//#define _noinline __attribute__((__noinline__))
#endif
////////////////////////////////////////////////////////////////////
@ -35,7 +34,7 @@ static inline bool q_cmp(struct pairing_heap_mref *_a, struct pairing_heap_mref
_PAIRING_HEAP_FUNCTIONS(static,mref,q_cmp);
static inline void q_init(struct logger_queue *q) _noinline
static inline void q_init(struct logger_queue *q)
{
INIT_LIST_HEAD(&q->q_anchor);
q->heap_low = NULL;
@ -69,7 +68,7 @@ always_done:
return res;
}
static inline void q_insert(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a) _noinline
static inline void q_insert(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a)
{
unsigned long flags;
@ -89,7 +88,7 @@ static inline void q_insert(struct logger_queue *q, struct trans_logger_mref_asp
traced_unlock(&q->q_lock, flags);
}
static inline void q_pushback(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a) _noinline
static inline void q_pushback(struct logger_queue *q, struct trans_logger_mref_aspect *mref_a)
{
unsigned long flags;
@ -107,7 +106,7 @@ static inline void q_pushback(struct logger_queue *q, struct trans_logger_mref_a
traced_unlock(&q->q_lock, flags);
}
static inline struct trans_logger_mref_aspect *q_fetch(struct logger_queue *q) _noinline
static inline struct trans_logger_mref_aspect *q_fetch(struct logger_queue *q)
{
struct trans_logger_mref_aspect *mref_a = NULL;
unsigned long flags;
@ -158,7 +157,7 @@ static inline struct trans_logger_mref_aspect *q_fetch(struct logger_queue *q) _
///////////////////////// own helper functions ////////////////////////
static inline int hash_fn(loff_t base_index) _noinline
static inline int hash_fn(loff_t base_index)
{
// simple and stupid
loff_t tmp;
@ -222,7 +221,7 @@ static struct trans_logger_mref_aspect *hash_find(struct hash_anchor *table, lof
return res;
}
static inline void hash_insert(struct hash_anchor *table, struct trans_logger_mref_aspect *elem_a, atomic_t *cnt) _noinline
static inline void hash_insert(struct hash_anchor *table, struct trans_logger_mref_aspect *elem_a, atomic_t *cnt)
{
loff_t base_index = elem_a->object->ref_pos >> REGION_SIZE_BITS;
int hash = hash_fn(base_index);
@ -242,7 +241,7 @@ static inline void hash_insert(struct hash_anchor *table, struct trans_logger_mr
traced_writeunlock(&start->hash_lock, flags);
}
static inline bool hash_put(struct hash_anchor *table, struct trans_logger_mref_aspect *elem_a, atomic_t *cnt) _noinline
static inline bool hash_put(struct hash_anchor *table, struct trans_logger_mref_aspect *elem_a, atomic_t *cnt)
{
struct mref_object *elem = elem_a->object;
loff_t base_index = elem->ref_pos >> REGION_SIZE_BITS;
@ -327,7 +326,7 @@ static int _write_ref_get(struct trans_logger_output *output, struct trans_logge
}
mref_a->output = output;
mref_a->stamp = CURRENT_TIME;
get_lamport(&mref_a->stamp);
mref->ref_flags = MREF_UPTODATE;
mref_a->shadow_ref = mref_a; // cyclic self-reference
atomic_set(&mref->ref_count, 1);
@ -354,6 +353,13 @@ static int trans_logger_ref_get(struct trans_logger_output *output, struct mref_
if (mref->ref_may_write == READ) {
return _read_ref_get(output, mref_a);
}
/* FIXME: THIS IS PROVISIONARY (use event instead)
*/
while (unlikely(!output->brick->power.led_on)) {
msleep(2 * HZ);
}
return _write_ref_get(output, mref_a);
err:
@ -463,7 +469,7 @@ static void trans_logger_ref_io(struct trans_logger_output *output, struct mref_
MARS_DBG("hashing %d at %lld\n", mref->ref_len, mref->ref_pos);
hash_insert(output->hash_table, mref_a, &output->hash_count);
q_insert(&output->q_phase1, mref_a);
wake_up(&output->event);
wake_up_interruptible(&output->event);
}
return;
}
@ -526,7 +532,7 @@ static void phase1_endio(struct generic_callback *cb)
// queue up for the next phase
q_insert(&output->q_phase2, orig_mref_a);
wake_up(&output->event);
wake_up_interruptible(&output->event);
err: ;
}
@ -567,6 +573,13 @@ static bool phase1_startio(struct trans_logger_mref_aspect *orig_mref_a)
goto err;
}
atomic_inc(&output->q_phase1.q_flying);
/* NYI Provisionary! this is wrong!
* All requests must be sorted according to pos,
* only the smallest _uncommitted_ write-back
* should be counting!
*/
brick->current_pos = brick->logst.log_pos;
return true;
err:
@ -605,7 +618,7 @@ static void phase2_endio(struct generic_callback *cb)
} else {
q_insert(&output->q_phase4, sub_mref_a);
}
wake_up(&output->event);
wake_up_interruptible(&output->event);
err: ;
}
@ -715,7 +728,7 @@ static void phase3_endio(struct generic_callback *cb)
// queue up for the next phase
q_insert(&output->q_phase4, sub_mref_a);
wake_up(&output->event);
wake_up_interruptible(&output->event);
err: ;
}
@ -795,7 +808,7 @@ put:
//MARS_INF("put ORIGREF.\n");
CHECK_ATOMIC(&orig_mref->ref_count, 1);
trans_logger_ref_put(orig_mref_a->output, orig_mref);
wake_up(&output->event);
wake_up_interruptible(&output->event);
err: ;
}
@ -845,8 +858,6 @@ err:
/*********************************************************************
* The logger thread.
* There is only a single instance, dealing with all requests in parallel.
* So there is less need for locking (concept stolen from microkernel
* architectures).
*/
static int run_queue(struct logger_queue *q, bool (*startio)(struct trans_logger_mref_aspect *sub_mref_a), int max)
@ -868,17 +879,26 @@ static int run_queue(struct logger_queue *q, bool (*startio)(struct trans_logger
return 0;
}
static int trans_logger_thread(void *data)
static inline int _congested(struct trans_logger_output *output)
{
struct trans_logger_output *output = data;
struct trans_logger_brick *brick;
return atomic_read(&output->q_phase1.q_queued)
|| atomic_read(&output->q_phase1.q_flying)
|| atomic_read(&output->q_phase2.q_queued)
|| atomic_read(&output->q_phase2.q_flying)
|| atomic_read(&output->q_phase3.q_queued)
|| atomic_read(&output->q_phase3.q_flying)
|| atomic_read(&output->q_phase4.q_queued)
|| atomic_read(&output->q_phase4.q_flying);
}
static
void trans_logger_log(struct trans_logger_output *output)
{
struct trans_logger_brick *brick = output->brick;
int wait_jiffies = HZ;
int last_jiffies = jiffies;
brick = output->brick;
MARS_INF("logger has started.\n");
while (!kthread_should_stop()) {
while (!kthread_should_stop() || _congested(output)) {
int status;
wait_event_interruptible_timeout(
@ -886,7 +906,8 @@ static int trans_logger_thread(void *data)
q_is_ready(&output->q_phase1) ||
q_is_ready(&output->q_phase2) ||
q_is_ready(&output->q_phase3) ||
q_is_ready(&output->q_phase4),
q_is_ready(&output->q_phase4) ||
(kthread_should_stop() && !_congested(output)),
wait_jiffies);
#if 1
if (((int)jiffies) - last_jiffies >= HZ * 10 && atomic_read(&output->hash_count) > 0) {
@ -923,6 +944,73 @@ static int trans_logger_thread(void *data)
(void)run_queue(&output->q_phase4, phase4_startio, 64);
}
}
}
static
void trans_logger_replay(struct trans_logger_output *output)
{
struct trans_logger_brick *brick = output->brick;
MARS_INF("NYI simulating replay at %lld....\n", brick->current_pos);
msleep(15 * 1000);
MARS_INF("NYI simulated replay finished at %lld....\n", brick->end_pos);
brick->current_pos = brick->end_pos;
mars_trigger();
while (!kthread_should_stop()) {
msleep(1000);
}
}
static
int trans_logger_thread(void *data)
{
struct trans_logger_output *output = data;
struct trans_logger_brick *brick = output->brick;
MARS_INF("........... logger has started.\n");
brick->current_pos = brick->start_pos;
mars_power_led_on((void*)brick, true);
if (brick->do_replay) {
trans_logger_replay(output);
} else {
trans_logger_log(output);
}
MARS_INF("........... logger has stopped.\n");
mars_power_led_off((void*)brick, true);
return 0;
}
static
int trans_logger_switch(struct trans_logger_brick *brick)
{
static int index = 0;
struct trans_logger_output *output = brick->outputs[0];
if (brick->power.button) {
mars_power_led_off((void*)brick, false);
if (!output->thread) {
output->thread = kthread_create(trans_logger_thread, output, "mars_logger%d", index++);
if (IS_ERR(output->thread)) {
int error = PTR_ERR(output->thread);
MARS_ERR("cannot create thread, status=%d\n", error);
output->thread = NULL;
return error;
}
get_task_struct(output->thread);
wake_up_process(output->thread);
}
} else {
mars_power_led_on((void*)brick, false);
if (output->thread) {
kthread_stop(output->thread);
put_task_struct(output->thread);
output->thread = NULL;
}
}
return 0;
}
@ -956,7 +1044,6 @@ static int trans_logger_brick_construct(struct trans_logger_brick *brick)
static int trans_logger_output_construct(struct trans_logger_output *output)
{
static int index = 0;
int i;
for (i = 0; i < TRANS_HASH_MAX; i++) {
struct hash_anchor *start = &output->hash_table[i];
@ -969,13 +1056,6 @@ static int trans_logger_output_construct(struct trans_logger_output *output)
q_init(&output->q_phase2);
q_init(&output->q_phase3);
q_init(&output->q_phase4);
output->thread = kthread_create(trans_logger_thread, output, "mars_logger%d", index++);
if (IS_ERR(output->thread)) {
int error = PTR_ERR(output->thread);
MARS_ERR("cannot create thread, status=%d\n", error);
return error;
}
wake_up_process(output->thread);
return 0;
}
@ -987,6 +1067,7 @@ static int trans_logger_input_construct(struct trans_logger_input *input)
///////////////////////// static structs ////////////////////////
static struct trans_logger_brick_ops trans_logger_brick_ops = {
.brick_switch = trans_logger_switch,
};
static struct trans_logger_output_ops trans_logger_output_ops = {
@ -1040,13 +1121,13 @@ EXPORT_SYMBOL_GPL(trans_logger_brick_type);
static int __init init_trans_logger(void)
{
printk(MARS_INFO "init_trans_logger()\n");
MARS_INF("init_trans_logger()\n");
return trans_logger_register_brick_type();
}
static void __exit exit_trans_logger(void)
{
printk(MARS_INFO "exit_trans_logger()\n");
MARS_INF("exit_trans_logger()\n");
trans_logger_unregister_brick_type();
}

View File

@ -54,8 +54,14 @@ struct trans_logger_brick {
MARS_BRICK(trans_logger);
struct log_status logst;
// parameters
bool log_reads;
int limit_congest; // limit phase1 congestion.
int sequence; // logfile sequence number
int limit_congest;// limit phase1 congestion.
bool do_replay; // mode of operation
bool log_reads; // additionally log pre-images
loff_t start_pos; // where to start replay
loff_t end_pos; // end of replay
// readonly from outside
loff_t current_pos; // current replay position
};
struct trans_logger_output {

279
userspace/marsadm Normal file
View File

@ -0,0 +1,279 @@
#!/usr/bin/perl -w
# (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG
use strict;
use English;
use warnings;
my $mars = "/mars";
my $host = `uname -n` or die "cannot determine my network node name\n";
chomp $host;
my $ip = `ip a` or die "cannot determine my IP address\n";
$ip =~ s/\A.*inet +(?!127\.0\.)([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*\Z/$1/ms or die "cannot parse my IP address\n";
##################################################################
# syntactic checks
sub check_id {
my $str = shift;
die "identifier '$str' has disallowed characters" unless $str =~ m/^[A-Za-z_][A-Za-z0-9_]*$/;
die "identifier '$str' is too long (only 16 chars allowed)" if length($str) > 16;
}
##################################################################
# semantic checks
sub check_res {
my $res = shift;
die "resource '$res' does not exist\n" unless -d "$mars/$res";
}
sub check_size {
my $arg = shift;
return -1 unless $arg =~ m/^[0-9.]+[kmgtp]?$/i;
my $mod = $arg;
$arg =~ s/[^0-9]+$//;
$mod =~ s/^[0-9]+//;
$_ = $mod;
SWITCH: {
/k/i and $arg *= 1024, last SWITCH;
/m/i and $arg *= 1024 * 1024, last SWITCH;
/g/i and $arg *= 1024 * 1024 * 1024, last SWITCH;
/t/i and $arg *= 1024 * 1024 * 1024 * 1024, last SWITCH;
/p/i and $arg *= 1024 * 1024 * 1024 * 1024 * 1024, last SWITCH;
}
return $arg;
}
##################################################################
# commands
sub ignore_cmd {
my ($cmd, $res) = @_;
print "ignoring command '$cmd' on resource '$res'\n";
exit(0);
}
sub senseless_cmd {
my ($cmd, $res) = @_;
print "command '$cmd' makes no sense with MARS (ignoring)\n";
exit(0);
}
sub forbidden_cmd {
my ($cmd, $res) = @_;
die "command '$cmd' cannot be used with MARS (it is impossible to carry out uniquely and could therefore lead to a disaster)\n";
}
sub nyi_cmd {
my ($cmd, $res) = @_;
die "command '$cmd' is not yet implemented\n";
}
sub join_system {
my ($cmd, $peer, $force) = @_;
if(-e glob("$mars/resource-*")) {
die "Sorry, some resources already exist!\nThis is dangerous!\nIf you are sure that no resource clash is possible, re-invoke this command with '--force' option\n" unless ($force and $force =~ m/--force/);
}
print "joining system via rsync (peer='$peer')\n";
system("mkdir $mars") unless -d $mars;
system("mkdir $mars/ips") unless -d "$mars/ips";
system("rsync --recursive --links -v $peer:$mars/ips/ $mars/ips/") == 0 or die "oops\n";
symlink($ip, "$mars/ips/ip-$host");
}
sub create_res {
my ($cmd, $res, $dev, $appear) = @_;
my $create = ($cmd eq "create-resource");
die "undefined device or size argument\n" unless $dev;
die "resource '$res' already exists\n" if -d "$mars/resource-$res";
check_id($appear) if $appear;
if($create) {
print "creating new resource '$res'\n";
} else {
print "joining to existing resource '$res'\n";
}
my $size = check_size($dev);
if($size > 0) {
$dev = "";
} else {
die "block device '$dev' does not exist\n" unless -b $dev;
die "block device '$dev' must be an absolute path starting with '/'\n" unless $dev =~ m/^\//;
}
my $tmp = "$mars/.tmp.$res";
my $primary;
if($create) {
system("mkdir $mars") unless -d $mars;
system("mkdir $mars/ips") unless -d "$mars/ips";
symlink($ip, "$mars/ips/ip-$host");
system("rm -rf $tmp");
system("mkdir $tmp") == 0 or die "could not create resource '$res'\n";
} else {
$primary = readlink("$tmp/primary") or die "cannot determine primary\n";
$tmp = "$mars/resource-$res";
die "resource '$res' does not exist\n" unless -d $tmp;
die "resource '$res' is already joined\n" if -e "$tmp/data-$host";
die "my ip '$ip' is not registered -- please run 'join-system' first\n" unless -l "$mars/ips/ip-$host";
}
my $file = "$tmp/data-$host";
if($size > 0) {
print "creating sparse file '$file' with size $size\n";
open(OUT, ">$file") or die "could not open '$file'\n";
use Fcntl 'SEEK_SET';
sysseek(OUT, $size-1, SEEK_SET) == $size-1 or die "could not seek\n";
syswrite(OUT, '\0', 1) == 1 or die "cannot init sparse file\n";
close OUT;
} else {
print "using existing device '$dev'\n";
symlink($dev, $file) or die "cannot create device symlink\n";
}
if($appear) {
# TODO: check for uniqeness of $appear
print "resource '$res' will appear as local device '/dev/mars/$appear'\n";
system("rm -f $tmp/device-$host");
symlink($appear, "$tmp/device-$host") or die "cannot create symlink for local device appearance\n";
}
if($create) {
symlink($host, "$tmp/primary") or die "cannot create primary symlink\n";
symlink("log-000000001-$host,0", "$tmp/replay-$host") or die "cannot create replay status\n";
rename($tmp, "$mars/resource-$res") or die "cannot finalize resource '$res'\n";
print "successfully created resource '$res'\n";
} else {
system("rm -f $tmp/syncstatus-$host");
symlink("0", "$tmp/syncstatus-$host") or die "cannot start initial sync\n";
system("rm -f $tmp/connect-$host");
symlink($primary, "$tmp/connect-$host") or die "cannot create peer symlink\n";
symlink($host, "$tmp/connect-$primary") unless ( -l "$tmp/connect-$primary" or -l "$tmp/off.connect-$primary" );
print "successfully joined resource '$res'\n";
}
}
sub attach_res {
my ($cmd, $res) = @_;
my $detach = ($cmd eq "detach");
if($detach) {
die "resource '$res' is not attached\n" unless -l "$mars/$res/data-$host";
rename("$mars/$res/data-$host", "$mars/$res/off.data-$host") or die "operation failed\n";
} else {
die "resource '$res' is not detached\n" unless -l "$mars/$res/off.data-$host";
rename("$mars/$res/off.data-$host", "$mars/$res/data-$host") or die "operation failed\n";
}
}
sub connect_res {
my ($cmd, $res) = @_;
my $disconnect = ($cmd eq "disconnect");
my $src = "off.";
my $dst = "";
if($disconnect) {
$dst = $src;
$src = "";
}
rename("$mars/$res/${src}connect-$host", "$mars/$res/${dst}connect-$host");
}
sub up_res {
my ($cmd, $res) = @_;
my $down = ($cmd eq "down");
if($down) {
connect_res("disconnect", $res);
attach_res("detach", $res);
} else {
attach_res("attach", $res);
connect_res("connect", $res);
}
}
sub primary_res {
my ($cmd, $res) = @_;
my $sec = ($cmd eq "secondary");
my $pri = "$mars/$res/primary";
my $old = readlink($pri) or die "cannot determine current primary\n";
if($sec) {
die "for safety reasons, switching to secondary is only allowed when I am primary\n" if($old ne $host);
$host = "(none)";
} elsif($old eq $host) {
print "I am already primary.\n";
exit(0);
}
# TODO: check whether we can switch without interrupting service....
my $tmp = "$mars/$res/.tmp.primary";
system("rm -f $tmp");
symlink($host, $tmp) or die "cannot create new primary symlink\n";
rename($tmp, $pri) or die "cannot install new primary symlink\n";
print "primary changed from '$old' to '$host'\n";
}
sub role_cmd {
my ($cmd, $res) = @_;
my $pri = "$mars/$res/primary";
my $old = readlink($pri) or die "cannot determine current primary\n";
if($old eq $host) {
print "primary\n";
} else {
print "secondary\n";
}
}
##################################################################
my %cmd_table =
(
# new keywords
"join-system" => \&join_system,
"create-resource" => \&create_res,
"join-resource" => \&create_res,
# compatible keywords
"attach" => \&attach_res,
"detach" => \&attach_res,
"connect" => \&connect_res,
"disconnect" => \&connect_res,
"syncer" => \&ignore_cmd,
"up" => \&up_res,
"down" => \&up_res,
"primary" => \&primary_res,
"secondary" => \&primary_res,
"invalidate" => \&nyi_cmd,
"invalidate-remote" => \&forbidden_cmd,
"resize" => \&nyi_cmd,
"create-md" => \&senseless_cmd,
"get-gi" => \&ignore_cmd,
"show-gi" => \&ignore_cmd,
"dump-md" => \&senseless_cmd,
"outdate" => \&ignore_cmd,
"adjust" => \&senseless_cmd,
"wait-connect" => \&nyi_cmd,
"role" => \&role_cmd,
"state" => \&role_cmd,
"cstate" => \&nyi_cmd,
"status" => \&nyi_cmd,
"dump" => \&senseless_cmd,
"verify" => \&nyi_cmd,
"pause-sync" => \&nyi_cmd,
"resume-sync" => \&nyi_cmd,
"new-current-uuid" => \&senseless_cmd,
"dstate" => \&nyi_cmd,
"hidden-commands" => \&ignore_cmd,
);
my $cmd = shift || die "command argument is missing\n";
my $res = shift || die "resource argument is missing\n";
die "unknown command '$cmd'\n" if !exists $cmd_table{$cmd};
check_id($res);
check_res($res) unless $cmd =~ m/^(join-system|create-resource)$/;
my $func = $cmd_table{$cmd};
&{$func}($cmd, $res, @ARGV);