// (c) 2010 Thomas Schoebel-Theuer / 1&1 Internet AG //#define BRICK_DEBUGGING //#define MARS_DEBUGGING #include #include #include #include #include #include #include #define _STRATEGY #include "mars.h" #include "mars_client.h" #include #include #include #define SKIP_BIO false ///////////////////////////////////////////////////////////////////////// // MARS-specific memory allocation #define USE_KERNEL_PAGES #define MARS_MAX_ORDER 8 //#define USE_OFFSET //#define USE_INTERNAL_FREELIST #ifdef USE_INTERNAL_FREELIST void *mars_freelist[MARS_MAX_ORDER+1] = {}; atomic_t freelist_count[MARS_MAX_ORDER+1] = {}; #endif void *mars_alloc(loff_t pos, int len) { int offset = 0; void *data; #ifdef USE_KERNEL_PAGES int order = MARS_MAX_ORDER; if (unlikely(len > (PAGE_SIZE << order) || len <=0)) { MARS_ERR("trying to allocate %d bytes (max = %d)\n", len, (PAGE_SIZE << order)); return NULL; } #endif #ifdef USE_OFFSET offset = pos & (PAGE_SIZE-1); #endif #ifdef USE_KERNEL_PAGES len += offset; while (order > 0 && (PAGE_SIZE << (order-1)) >= len) { order--; } #ifdef USE_INTERNAL_FREELIST data = mars_freelist[order]; if (data) { mars_freelist[order] = *(void**)data; atomic_dec(&freelist_count[order]); } else #endif data = (void*)__get_free_pages(GFP_MARS, order); #else data = __vmalloc(len + offset, GFP_MARS, PAGE_KERNEL_IO); #endif if (likely(data)) { data += offset; } return data; } EXPORT_SYMBOL_GPL(mars_alloc); void mars_free(void *data, int len) { int offset = 0; #ifdef USE_KERNEL_PAGES int order = MARS_MAX_ORDER; #endif if (!data) { return; } #ifdef USE_OFFSET offset = ((unsigned long)data) & (PAGE_SIZE-1); #endif data -= offset; #ifdef USE_KERNEL_PAGES len += offset; while (order > 0 && (PAGE_SIZE << (order-1)) >= len) { order--; } #ifdef USE_INTERNAL_FREELIST if (order > 0 && atomic_read(&freelist_count[order]) < 500) { static int max[MARS_MAX_ORDER+1] = {}; int now; *(void**)data = mars_freelist[order]; mars_freelist[order] = data; atomic_inc(&freelist_count[order]); now = atomic_read(&freelist_count[order]); if (now > max[order] + 50) { int i; max[order] = now; MARS_INF("now %d freelist members at order %d (len = %d)\n", now, order, len); for (i = 0; i <= MARS_MAX_ORDER; i++) { MARS_INF(" %d : %4d\n", i, atomic_read(&freelist_count[i])); } } } else #endif __free_pages(virt_to_page((unsigned long)data), order); #else vfree(data); #endif } EXPORT_SYMBOL_GPL(mars_free); struct page *mars_iomap(void *data, int *offset, int *len) { int _offset = ((unsigned long)data) & (PAGE_SIZE-1); struct page *page; *offset = _offset; if (*len > PAGE_SIZE - _offset) { *len = PAGE_SIZE - _offset; } if (is_vmalloc_addr(data)) { page = vmalloc_to_page(data); } else { page = virt_to_page(data); } return page; } EXPORT_SYMBOL_GPL(mars_iomap); ///////////////////////////////////////////////////////////////////// // meta descriptions const struct meta mars_info_meta[] = { META_INI(current_size, struct mars_info, FIELD_INT), META_INI(transfer_order, struct mars_info, FIELD_INT), META_INI(transfer_size, struct mars_info, FIELD_INT), {} }; EXPORT_SYMBOL_GPL(mars_info_meta); const struct meta mars_mref_meta[] = { META_INI(ref_pos, struct mref_object, FIELD_INT), META_INI(ref_len, struct mref_object, FIELD_INT), META_INI(ref_may_write, struct mref_object, FIELD_INT), META_INI(ref_prio, struct mref_object, FIELD_INT), META_INI(ref_timeout, struct mref_object, FIELD_INT), META_INI(ref_total_size, struct mref_object, FIELD_INT), META_INI(ref_flags, struct mref_object, FIELD_INT), META_INI(ref_rw, struct mref_object, FIELD_INT), META_INI(ref_id, struct mref_object, FIELD_INT), META_INI(ref_skip_sync, struct mref_object, FIELD_INT), META_INI(_ref_cb.cb_error, struct mref_object, FIELD_INT), {} }; EXPORT_SYMBOL_GPL(mars_mref_meta); const struct meta mars_timespec_meta[] = { META_INI(tv_sec, struct timespec, FIELD_INT), META_INI(tv_nsec, struct timespec, FIELD_INT), {} }; EXPORT_SYMBOL_GPL(mars_timespec_meta); const struct meta mars_kstat_meta[] = { META_INI(ino, struct kstat, FIELD_INT), META_INI(mode, struct kstat, FIELD_INT), META_INI(size, struct kstat, FIELD_INT), META_INI_SUB(atime, struct kstat, mars_timespec_meta), META_INI_SUB(mtime, struct kstat, mars_timespec_meta), META_INI_SUB(ctime, struct kstat, mars_timespec_meta), META_INI(blksize, struct kstat, FIELD_INT), {} }; EXPORT_SYMBOL_GPL(mars_kstat_meta); const struct meta mars_dent_meta[] = { META_INI(d_name, struct mars_dent, FIELD_STRING), META_INI(d_rest, struct mars_dent, FIELD_STRING), META_INI(d_path, struct mars_dent, FIELD_STRING), META_INI(d_namelen, struct mars_dent, FIELD_INT), META_INI(d_pathlen, struct mars_dent, FIELD_INT), META_INI(d_type, struct mars_dent, FIELD_INT), META_INI(d_class, struct mars_dent, FIELD_INT), META_INI(d_serial, struct mars_dent, FIELD_INT), META_INI_SUB(new_stat,struct mars_dent, mars_kstat_meta), META_INI_SUB(old_stat,struct mars_dent, mars_kstat_meta), META_INI(new_link, struct mars_dent, FIELD_STRING), META_INI(old_link, struct mars_dent, FIELD_STRING), META_INI(d_args, struct mars_dent, FIELD_STRING), META_INI(d_argv[0], struct mars_dent, FIELD_STRING), META_INI(d_argv[1], struct mars_dent, FIELD_STRING), META_INI(d_argv[2], struct mars_dent, FIELD_STRING), META_INI(d_argv[3], struct mars_dent, FIELD_STRING), {} }; EXPORT_SYMBOL_GPL(mars_dent_meta); ///////////////////////////////////////////////////////////////////// // tracing #ifdef MARS_TRACING unsigned long long start_trace_clock = 0; EXPORT_SYMBOL_GPL(start_trace_clock); struct file *mars_log_file = NULL; loff_t mars_log_pos = 0; void _mars_log(char *buf, int len) { static DECLARE_MUTEX(trace_lock); mm_segment_t oldfs; oldfs = get_fs(); set_fs(get_ds()); down(&trace_lock); vfs_write(mars_log_file, buf, len, &mars_log_pos); up(&trace_lock); set_fs(oldfs); } EXPORT_SYMBOL_GPL(_mars_log); void mars_log(const char *fmt, ...) { char *buf = kmalloc(PAGE_SIZE, GFP_MARS); va_list args; int len; if (!buf) return; va_start(args, fmt); len = vsnprintf(buf, PAGE_SIZE, fmt, args); va_end(args); _mars_log(buf, len); kfree(buf); } EXPORT_SYMBOL_GPL(mars_log); void mars_trace(struct mref_object *mref, const char *info) { int index = mref->ref_traces; if (likely(index < MAX_TRACES)) { mref->ref_trace_stamp[index] = cpu_clock(raw_smp_processor_id()); mref->ref_trace_info[index] = info; mref->ref_traces++; } } EXPORT_SYMBOL_GPL(mars_trace); void mars_log_trace(struct mref_object *mref) { char *buf = kmalloc(PAGE_SIZE, GFP_MARS); unsigned long long old; unsigned long long diff; int i; int len; if (!buf) { return; } if (!mars_log_file || !mref->ref_traces) { goto done; } if (!start_trace_clock) { start_trace_clock = mref->ref_trace_stamp[0]; } diff = mref->ref_trace_stamp[mref->ref_traces-1] - mref->ref_trace_stamp[0]; len = snprintf(buf, PAGE_SIZE, "%c ;%12lld ;%6d;%10llu", mref->ref_rw ? 'W' : 'R', mref->ref_pos, mref->ref_len, diff / 1000); old = start_trace_clock; for (i = 0; i < mref->ref_traces; i++) { diff = mref->ref_trace_stamp[i] - old; len += snprintf(buf + len, PAGE_SIZE - len, " ; %s ;%10llu", mref->ref_trace_info[i], diff / 1000); old = mref->ref_trace_stamp[i]; } len +=snprintf(buf + len, PAGE_SIZE - len, "\n"); _mars_log(buf, len); done: kfree(buf); mref->ref_traces = 0; } EXPORT_SYMBOL_GPL(mars_log_trace); #endif // MARS_TRACING ///////////////////////////////////////////////////////////////////// // some helpers int mars_stat(const char *path, struct kstat *stat, bool use_lstat) { mm_segment_t oldfs; int status; oldfs = get_fs(); set_fs(get_ds()); if (use_lstat) { status = vfs_lstat((char*)path, stat); } else { status = vfs_stat((char*)path, stat); } set_fs(oldfs); return status; } EXPORT_SYMBOL_GPL(mars_stat); int mars_mkdir(const char *path) { mm_segment_t oldfs; int status; oldfs = get_fs(); set_fs(get_ds()); status = sys_mkdir(path, 0700); set_fs(oldfs); return status; } EXPORT_SYMBOL_GPL(mars_mkdir); int mars_symlink(const char *oldpath, const char *newpath, const struct timespec *stamp, uid_t uid) { char *tmp = backskip_replace(newpath, '/', true, "/.tmp-"); mm_segment_t oldfs; int status = -ENOMEM; if (unlikely(!tmp)) goto done; oldfs = get_fs(); set_fs(get_ds()); (void)sys_unlink(tmp); status = sys_symlink(oldpath, tmp); if (stamp) { struct timespec times[2]; sys_lchown(tmp, uid, 0); memcpy(×[0], stamp, sizeof(struct timespec)); memcpy(×[1], stamp, sizeof(struct timespec)); status = do_utimes(AT_FDCWD, tmp, times, AT_SYMLINK_NOFOLLOW); } if (status >= 0) { status = mars_rename(tmp, newpath); } set_fs(oldfs); kfree(tmp); done: return status; } EXPORT_SYMBOL_GPL(mars_symlink); int mars_rename(const char *oldpath, const char *newpath) { mm_segment_t oldfs; int status; oldfs = get_fs(); set_fs(get_ds()); status = sys_rename(oldpath, newpath); set_fs(oldfs); return status; } EXPORT_SYMBOL_GPL(mars_rename); int mars_chmod(const char *path, mode_t mode) { mm_segment_t oldfs; int status; oldfs = get_fs(); set_fs(get_ds()); status = sys_chmod(path, mode); set_fs(oldfs); return status; } EXPORT_SYMBOL_GPL(mars_chmod); int mars_lchown(const char *path, uid_t uid) { mm_segment_t oldfs; int status; oldfs = get_fs(); set_fs(get_ds()); status = sys_lchown(path, uid, 0); set_fs(oldfs); return status; } EXPORT_SYMBOL_GPL(mars_lchown); #include struct crypto_hash *mars_tfm = NULL; int mars_digest_size = 0; EXPORT_SYMBOL_GPL(mars_digest_size); void mars_digest(void *digest, void *data, int len) { struct hash_desc desc = { .tfm = mars_tfm, .flags = 0, }; struct scatterlist sg; memset(digest, 0, mars_digest_size); crypto_hash_init(&desc); sg_init_table(&sg, 1); sg_set_buf(&sg, data, len); crypto_hash_update(&desc, &sg, sg.length); crypto_hash_final(&desc, digest); } EXPORT_SYMBOL_GPL(mars_digest); ////////////////////////////////////////////////////////////// // object stuff const struct generic_object_type mref_type = { .object_type_name = "mref", .default_size = sizeof(struct mref_object), .brick_obj_nr = BRICK_OBJ_MREF, }; EXPORT_SYMBOL_GPL(mref_type); ////////////////////////////////////////////////////////////// // brick stuff ////////////////////////////////////////////////////////////// // infrastructure static char *id = NULL; /* TODO: better use MAC addresses (or motherboard IDs where available). * Or, at least, some checks for MAC addresses should be recorded / added. * When the nodename is misconfigured, data might be scrambled. * MAC addresses should be more secure. * In ideal case, further checks should be added to prohibit accidental * name clashes. */ char *my_id(void) { struct new_utsname *u; if (id) return id; //down_read(&uts_sem); // FIXME: this is currenty not EXPORTed from the kernel! u = utsname(); if (u) { id = kstrdup(u->nodename, GFP_MARS); } //up_read(&uts_sem); return id; } EXPORT_SYMBOL_GPL(my_id); struct mars_global *mars_global = NULL; EXPORT_SYMBOL_GPL(mars_global); void _mars_trigger(void) { if (mars_global) { mars_global->main_trigger = true; wake_up_interruptible(&mars_global->main_event); } } EXPORT_SYMBOL_GPL(_mars_trigger); int mars_power_button(struct mars_brick *brick, bool val, bool force_off) { int status = 0; bool oldval = brick->power.button; if (force_off && !val) brick->power.force_off = true; if (brick->power.force_off) val = false; if (val != oldval) { MARS_DBG("brick '%s' type '%s' power button %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val); set_button(&brick->power, val, false); if (brick->ops) status = brick->ops->brick_switch(brick); mars_trigger(); } return status; } EXPORT_SYMBOL_GPL(mars_power_button); int mars_power_button_recursive(struct mars_brick *brick, bool val, bool force_off, int timeout) { int status = 0; bool oldval = brick->power.button; if (force_off && !val) brick->power.force_off = true; if (brick->power.force_off) val = false; if (val != oldval) { brick_switch_t mode; mode = (val ? BR_ON_ALL : (force_off ? BR_FREE_ALL : BR_OFF_ALL)); MARS_DBG("brick '%s' type '%s' power button %d -> %d (mode = %d)\n", brick->brick_path, brick->type->type_name, oldval, val, mode); status = set_recursive_button((void*)brick, mode, timeout); } return status; } EXPORT_SYMBOL_GPL(mars_power_button_recursive); void mars_power_led_on(struct mars_brick *brick, bool val) { bool oldval = brick->power.led_on; if (val != oldval) { MARS_DBG("brick '%s' type '%s' led_on %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val); set_led_on(&brick->power, val); mars_trigger(); } } EXPORT_SYMBOL_GPL(mars_power_led_on); void mars_power_led_off(struct mars_brick *brick, bool val) { bool oldval = brick->power.led_off; if (val != oldval) { MARS_DBG("brick '%s' type '%s' led_off %d -> %d\n", brick->brick_path, brick->type->type_name, oldval, val); set_led_off(&brick->power, val); mars_trigger(); } } EXPORT_SYMBOL_GPL(mars_power_led_off); ///////////////////////////////////////////////////////////////////// // strategy layer struct mars_cookie { struct mars_global *global; mars_dent_checker checker; char *path; struct mars_dent *parent; int pathlen; int allocsize; int depth; }; static int get_inode(char *newpath, struct mars_dent *dent) { mm_segment_t oldfs; int status; struct kstat tmp = {}; oldfs = get_fs(); set_fs(get_ds()); status = vfs_lstat(newpath, &tmp); if (status < 0) { MARS_ERR("cannot stat '%s', status = %d\n", newpath, status); goto done; } memcpy(&dent->old_stat, &dent->new_stat, sizeof(dent->old_stat)); memcpy(&dent->new_stat, &tmp, sizeof(dent->new_stat)); if (S_ISLNK(dent->new_stat.mode)) { struct path path = {}; int len = dent->new_stat.size; struct inode *inode; char *link; if (unlikely(len <= 0)) { MARS_ERR("symlink '%s' bad len = %d\n", newpath, len); status = -EINVAL; goto done; } status = user_path_at(AT_FDCWD, newpath, 0, &path); if (unlikely(status < 0)) { MARS_ERR("cannot read link '%s'\n", newpath); goto done; } inode = path.dentry->d_inode; status = -ENOMEM; link = kmalloc(len + 2, GFP_MARS); if (likely(link)) { MARS_IO("len = %d\n", len); status = inode->i_op->readlink(path.dentry, link, len + 1); link[len] = '\0'; if (status < 0 || (dent->new_link && !strncmp(dent->new_link, link, len))) { //MARS_IO("symlink no change '%s' -> '%s' (%s) status = %d\n", newpath, link, dent->new_link ? dent->new_link : "", status); kfree(link); } else { MARS_IO("symlink '%s' -> '%s' (%s) status = %d\n", newpath, link, dent->new_link ? dent->new_link : "", status); if (dent->old_link) kfree(dent->old_link); dent->old_link = dent->new_link; dent->new_link = link; } } path_put(&path); } if (dent->new_link) MARS_IO("symlink '%s'\n", dent->new_link); done: set_fs(oldfs); return status; } static int mars_filler(void *__buf, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct mars_cookie *cookie = __buf; struct mars_global *global = cookie->global; struct list_head *anchor = &global->dent_anchor; struct mars_dent *dent; struct list_head *tmp; struct mars_dent *best = NULL; char *newpath; int prefix = 0; int pathlen; int class; int serial = 0; MARS_IO("ino = %llu len = %d offset = %lld type = %u\n", ino, namlen, offset, d_type); if (name[0] == '.') { return 0; } class = cookie->checker(cookie->parent, name, namlen, d_type, &prefix, &serial); if (class < 0) return 0; pathlen = cookie->pathlen; newpath = kmalloc(pathlen + namlen + 2, GFP_MARS); if (unlikely(!newpath)) goto err_mem0; memcpy(newpath, cookie->path, pathlen); newpath[pathlen++] = '/'; memcpy(newpath + pathlen, name, namlen); pathlen += namlen; newpath[pathlen] = '\0'; MARS_IO("path = '%s'\n", newpath); for (tmp = anchor->next; tmp != anchor; tmp = tmp->next) { int cmp; dent = container_of(tmp, struct mars_dent, dent_link); cmp = strcmp(dent->d_path, newpath); if (!cmp) { goto found; } // keep the list sorted. find the next smallest member. if ((dent->d_class < class || (dent->d_class == class && (dent->d_serial < serial || (dent->d_serial == serial && cmp < 0)))) && (!best || best->d_class < dent->d_class || (best->d_class == dent->d_class && (best->d_serial < dent->d_serial || (best->d_serial == dent->d_serial && strcmp(best->d_path, dent->d_path) < 0))))) { best = dent; } } dent = kzalloc(cookie->allocsize, GFP_MARS); if (unlikely(!dent)) goto err_mem1; dent->d_name = kmalloc(namlen + 1, GFP_MARS); if (unlikely(!dent->d_name)) goto err_mem2; memcpy(dent->d_name, name, namlen); dent->d_name[namlen] = '\0'; dent->d_namelen = namlen; dent->d_rest = dent->d_name + prefix; dent->d_path = newpath; newpath = NULL; dent->d_pathlen = pathlen; INIT_LIST_HEAD(&dent->brick_list); if (best) { list_add(&dent->dent_link, &best->dent_link); } else { list_add_tail(&dent->dent_link, anchor); } found: dent->d_type = d_type; dent->d_class = class; dent->d_serial = serial; dent->d_parent = cookie->parent; dent->d_depth = cookie->depth; dent->d_global = global; dent->d_killme = false; if (newpath) kfree(newpath); return 0; err_mem2: kfree(dent); err_mem1: kfree(newpath); err_mem0: return -ENOMEM; } static int _mars_readdir(struct mars_cookie *cookie) { struct file *f; mm_segment_t oldfs; int status = 0; oldfs = get_fs(); set_fs(get_ds()); f = filp_open(cookie->path, O_DIRECTORY | O_RDONLY, 0); set_fs(oldfs); if (unlikely(IS_ERR(f))) { return PTR_ERR(f); } for (;;) { status = vfs_readdir(f, mars_filler, cookie); MARS_IO("vfs_readdir() status = %d\n", status); if (status <= 0) break; } filp_close(f, NULL); return status; } int mars_dent_work(struct mars_global *global, char *dirname, int allocsize, mars_dent_checker checker, mars_dent_worker worker, void *buf, int maxdepth) { static int version = 0; struct mars_cookie cookie = { .global = global, .checker = checker, .path = dirname, .pathlen = strlen(dirname), .allocsize = allocsize, .depth = 0, }; struct list_head *tmp; struct list_head *next; int rounds = 0; int status; int total_status = 0; bool found_dir; /* Initialize the flat dent list */ version++; total_status = _mars_readdir(&cookie); if (total_status || !worker) { goto done; } down_write(&global->dent_mutex); restart: found_dir = false; /* First, get all the inode information in a separate pass * before starting work. * The separate pass is necessary because some dents may * forward-reference other dents, and it would be a pity if * some inodes were not available or were outdated. */ for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) { struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link); // treat any member only once during this invocation if (dent->d_version == version) continue; dent->d_version = version; msleep(10); // yield MARS_IO("reading inode '%s'\n", dent->d_path); status = get_inode(dent->d_path, dent); total_status |= status; // recurse into subdirectories by inserting into the flat list if (S_ISDIR(dent->new_stat.mode) && dent->d_depth <= maxdepth) { struct mars_cookie sub_cookie = { .global = global, .checker = checker, .path = dent->d_path, .pathlen = dent->d_pathlen, .allocsize = allocsize, .parent = dent, .depth = dent->d_depth + 1, }; found_dir = true; status = _mars_readdir(&sub_cookie); total_status |= status; if (status < 0) { MARS_ERR("forward: status %d on '%s'\n", status, dent->d_path); } } } if (found_dir && ++rounds < 10) { goto restart; } /* Remove all dents marked for removal. */ for (tmp = global->dent_anchor.next, next = tmp->next; tmp != &global->dent_anchor; tmp = next, next = next->next) { struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link); if (!dent->d_killme) continue; MARS_DBG("killing dent '%s'\n", dent->d_path); list_del_init(tmp); //... FIXME memleak } up_write(&global->dent_mutex); /* Forward pass. */ down_read(&global->dent_mutex); for (tmp = global->dent_anchor.next, next = tmp->next; tmp != &global->dent_anchor; tmp = next, next = next->next) { struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link); msleep(10); // yield MARS_IO("forward treat '%s'\n", dent->d_path); status = worker(buf, dent, false); total_status |= status; if (status < 0) continue; if (status < 0) { MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path); } } /* Backward pass. */ for (tmp = global->dent_anchor.prev; tmp != &global->dent_anchor; tmp = tmp->prev) { struct mars_dent *dent = container_of(tmp, struct mars_dent, dent_link); msleep(10); // yield MARS_IO("backward treat '%s'\n", dent->d_path); status = worker(buf, dent, true); total_status |= status; if (status < 0) { MARS_ERR("backwards: status %d on '%s'\n", status, dent->d_path); } } up_read(&global->dent_mutex); done: return total_status; } EXPORT_SYMBOL_GPL(mars_dent_work); static struct mars_dent *_mars_find_dent(struct mars_global *global, const char *path) { struct mars_dent *res = NULL; struct list_head *tmp; if (!rwsem_is_locked(&global->dent_mutex)) { MARS_ERR("dent_mutex not held!\n"); } for (tmp = global->dent_anchor.next; tmp != &global->dent_anchor; tmp = tmp->next) { struct mars_dent *tmp_dent = container_of(tmp, struct mars_dent, dent_link); if (!strcmp(tmp_dent->d_path, path)) { res = tmp_dent; break; } } return res; } //EXPORT_SYMBOL_GPL(_mars_find_dent); struct mars_dent *mars_find_dent(struct mars_global *global, const char *path) { struct mars_dent *res; //down_read(&global->dent_mutex); res = _mars_find_dent(global, path); //up_read(&global->dent_mutex); return res; } EXPORT_SYMBOL_GPL(mars_find_dent); #if 0 // old code! does not work! incorrect locking / races! void mars_kill_dent(struct mars_dent *dent) { struct mars_global *global = dent->d_global; struct list_head *oldtmp = NULL; CHECK_PTR(global, done); down(&global->mutex); while (!list_empty(&dent->brick_list)) { struct list_head *tmp = dent->brick_list.next; struct mars_brick *brick = container_of(tmp, struct mars_brick, dent_brick_link); // just satisfy "defensive" programming style... if (unlikely(tmp == oldtmp)) { MARS_ERR("oops, something is nasty here\n"); list_del_init(tmp); continue; } oldtmp = tmp; // killing a brick may take a long time... up(&global->mutex); mars_kill_brick(brick); down(&global->mutex); } up(&global->mutex); done: ; } #else void mars_kill_dent(struct mars_dent *dent) { dent->d_killme = true; while (!list_empty(&dent->brick_list)) { struct list_head *tmp = dent->brick_list.next; struct mars_brick *brick = container_of(tmp, struct mars_brick, dent_brick_link); list_del_init(tmp); // note: locking is now done there.... mars_kill_brick(brick); } } #endif EXPORT_SYMBOL_GPL(mars_kill_dent); void mars_free_dent(struct mars_dent *dent) { int i; mars_kill_dent(dent); CHECK_HEAD_EMPTY(&dent->dent_link); CHECK_HEAD_EMPTY(&dent->brick_list); for (i = 0; i < MARS_ARGV_MAX; i++) { if (dent->d_argv[i]) kfree(dent->d_argv[i]); } if (dent->d_args) kfree(dent->d_args); if (dent->d_private) kfree(dent->d_private); if (dent->old_link) kfree(dent->old_link); if (dent->new_link) kfree(dent->new_link); kfree(dent->d_name); kfree(dent->d_path); kfree(dent); } EXPORT_SYMBOL_GPL(mars_free_dent); void mars_free_dent_all(struct list_head *anchor) { #if 0 // FIXME: locking while (!list_empty(anchor)) { struct mars_dent *dent; dent = container_of(anchor->prev, struct mars_dent, dent_link); mars_free_dent(dent); } #else // provisionary memleak list_del_init(anchor); #endif } EXPORT_SYMBOL_GPL(mars_free_dent_all); ///////////////////////////////////////////////////////////////////// // low-level brick instantiation struct mars_brick *mars_find_brick(struct mars_global *global, const void *brick_type, const char *path) { struct list_head *tmp; if (!global || !path) return NULL; down_read(&global->brick_mutex); for (tmp = global->brick_anchor.next; tmp != &global->brick_anchor; tmp = tmp->next) { struct mars_brick *test = container_of(tmp, struct mars_brick, global_brick_link); if (!strcmp(test->brick_path, path)) { up_read(&global->brick_mutex); if (brick_type && test->type != brick_type) { MARS_ERR("bad brick type\n"); return NULL; } return test; } } up_read(&global->brick_mutex); return NULL; } EXPORT_SYMBOL_GPL(mars_find_brick); int mars_free_brick(struct mars_brick *brick) { struct mars_global *global; int i; int status; if (!brick) { MARS_ERR("bad brick parameter\n"); status = -EINVAL; goto done; } if (!brick->power.force_off || !brick->power.led_off) { MARS_DBG("brick '%s' is not freeable\n", brick->brick_name); status = -ETXTBSY; goto done; } // first check whether the brick is in use somewhere for (i = 0; i < brick->nr_outputs; i++) { if (brick->outputs[i]->nr_connected > 0) { MARS_DBG("brick '%s' not freeable, output %i is used\n", brick->brick_name, i); status = -EEXIST; goto done; } } MARS_DBG("===> freeing brick name = '%s'\n", brick->brick_name); global = brick->global; if (global) { down_write(&global->brick_mutex); list_del_init(&brick->global_brick_link); list_del_init(&brick->dent_brick_link); up_write(&global->brick_mutex); } status = generic_brick_exit_full((void*)brick); if (status >= 0) { #ifndef MEMLEAK // TODO: check whether crash remains possible if (brick->brick_name) kfree(brick->brick_name); if (brick->brick_path) kfree(brick->brick_path); kfree(brick); #endif mars_trigger(); } else { MARS_ERR("error freeing brick, status = %d\n", status); } done: return status; } EXPORT_SYMBOL_GPL(mars_free_brick); struct mars_brick *mars_make_brick(struct mars_global *global, struct mars_dent *belongs, const void *_brick_type, const char *path, const char *_name) { const char *name = kstrdup(_name, GFP_MARS); const char *names[] = { name }; const struct generic_brick_type *brick_type = _brick_type; const struct generic_input_type **input_types; const struct generic_output_type **output_types; struct mars_brick *res; int size; int i; int status; if (!name) { MARS_ERR("cannot allocate space for name\n"); return NULL; } size = brick_type->brick_size + (brick_type->max_inputs + brick_type->max_outputs) * sizeof(void*); input_types = brick_type->default_input_types; for (i = 0; i < brick_type->max_inputs; i++) { const struct generic_input_type *type = *input_types++; if (unlikely(!type)) { MARS_ERR("input_type %d is missing\n", i); goto err_name; } if (unlikely(type->input_size <= 0)) { MARS_ERR("bad input_size at %d\n", i); goto err_name; } size += type->input_size; } output_types = brick_type->default_output_types; for (i = 0; i < brick_type->max_outputs; i++) { const struct generic_output_type *type = *output_types++; if (unlikely(!type)) { MARS_ERR("output_type %d is missing\n", i); goto err_name; } if (unlikely(type->output_size <= 0)) { MARS_ERR("bad output_size at %d\n", i); goto err_name; } size += type->output_size; } res = kzalloc(size, GFP_MARS); if (!res) { MARS_ERR("cannot grab %d bytes for brick type '%s'\n", size, brick_type->type_name); goto err_name; } res->global = global; INIT_LIST_HEAD(&res->dent_brick_link); res->brick_path = kstrdup(path, GFP_MARS); if (!res->brick_path) { MARS_ERR("cannot grab memory for path '%s'\n", path); goto err_res; } status = generic_brick_init_full(res, size, brick_type, NULL, NULL, names); MARS_DBG("brick '%s' init '%s' '%s' (status=%d)\n", brick_type->type_name, path, name, status); if (status < 0) { MARS_ERR("cannot init brick %s\n", brick_type->type_name); goto err_path; } res->free = mars_free_brick; /* Immediately make it visible, regardless of internal state. * Switching on / etc must be done separately. */ down_write(&global->brick_mutex); list_add(&res->global_brick_link, &global->brick_anchor); if (belongs) { list_add(&res->dent_brick_link, &belongs->brick_list); } up_write(&global->brick_mutex); return res; err_path: kfree(res->brick_path); err_res: kfree(res); err_name: kfree(name); return NULL; } EXPORT_SYMBOL_GPL(mars_make_brick); int mars_kill_brick(struct mars_brick *brick) { struct mars_global *global; int status = -EINVAL; CHECK_PTR(brick, done); global = brick->global; CHECK_PTR(global, done); MARS_DBG("===> killing brick path = '%s' name = '%s'\n", brick->brick_path, brick->brick_name); // start shutdown status = set_recursive_button((void*)brick, BR_FREE_ALL, 10 * HZ); done: return status; } EXPORT_SYMBOL_GPL(mars_kill_brick); ///////////////////////////////////////////////////////////////////// // mid-level brick instantiation (identity is based on path strings) char *vpath_make(const char *fmt, va_list *args) { int len = strlen(fmt); char *res = kmalloc(len + MARS_PATH_MAX, GFP_MARS); if (likely(res)) { vsnprintf(res, MARS_PATH_MAX, fmt, *args); } return res; } EXPORT_SYMBOL_GPL(vpath_make); char *path_make(const char *fmt, ...) { va_list args; char *res; va_start(args, fmt); res = vpath_make(fmt, &args); va_end(args); return res; } EXPORT_SYMBOL_GPL(path_make); char *backskip_replace(const char *path, char delim, bool insert, const char *fmt, ...) { int path_len = strlen(path); int total_len = strlen(fmt) + path_len + MARS_PATH_MAX; char *res = kmalloc(total_len, GFP_MARS); if (likely(res)) { va_list args; int pos = path_len; int plus; while (pos > 0 && path[pos] != '/') { pos--; } if (delim != '/') { while (pos < path_len && path[pos] != delim) { pos++; } } memcpy(res, path, pos); va_start(args, fmt); plus = vsnprintf(res + pos, total_len - pos, fmt, args); va_end(args); if (insert) { strncpy(res + pos + plus, path + pos + 1, total_len - pos - plus); } } return res; } EXPORT_SYMBOL_GPL(backskip_replace); struct mars_brick *path_find_brick(struct mars_global *global, const void *brick_type, const char *fmt, ...) { va_list args; char *fullpath; struct mars_brick *res; va_start(args, fmt); fullpath = vpath_make(fmt, &args); va_end(args); if (!fullpath) { return NULL; } res = mars_find_brick(global, brick_type, fullpath); kfree(fullpath); MARS_IO("search for '%s' found = %p\n", fullpath, res); return res; } EXPORT_SYMBOL_GPL(path_find_brick); const struct generic_brick_type *_client_brick_type = NULL; EXPORT_SYMBOL_GPL(_client_brick_type); const struct generic_brick_type *_bio_brick_type = NULL; EXPORT_SYMBOL_GPL(_bio_brick_type); const struct generic_brick_type *_aio_brick_type = NULL; EXPORT_SYMBOL_GPL(_aio_brick_type); struct mars_brick *make_brick_all( struct mars_global *global, struct mars_dent *belongs, void (*setup_fn)(struct mars_brick *brick, void *private), void *private, int timeout, const char *new_name, const struct generic_brick_type *new_brick_type, const struct generic_brick_type *prev_brick_type[], const char *switch_fmt, const char *new_fmt, const char *prev_fmt[], int prev_count, ... ) { va_list args; const char *switch_path = NULL; const char *new_path; const char *_new_path = NULL; struct mars_brick *brick = NULL; char *paths[prev_count]; struct mars_brick *prev[prev_count]; int switch_state = true; int i; // treat variable arguments va_start(args, prev_count); if (switch_fmt) { switch_state = false; if (switch_fmt[0]) { switch_path = vpath_make(switch_fmt, &args); } } if (new_fmt) { new_path = _new_path = vpath_make(new_fmt, &args); } else { new_path = new_name; } for (i = 0; i < prev_count; i++) { paths[i] = vpath_make(prev_fmt[i], &args); } va_end(args); if (!new_path) { MARS_ERR("could not create new path\n"); goto err; } if (switch_path) { struct mars_dent *test = mars_find_dent(global, switch_path); if (test && test->new_link) { sscanf(test->new_link, "%d", &switch_state); } } // brick already existing? brick = mars_find_brick(global, new_brick_type != _aio_brick_type && new_brick_type != _bio_brick_type ? new_brick_type : NULL, new_path); if (brick) { // just switch the power state MARS_DBG("found existing brick '%s'\n", new_path); goto do_switch; } if (!switch_state) { // don't start => also don't create MARS_DBG("no need for brick '%s'\n", new_path); goto done; } MARS_DBG("make new brick '%s'\n", new_path); if (!new_name) new_name = new_path; MARS_DBG("----> new brick type = '%s' path = '%s' name = '%s'\n", new_brick_type->type_name, new_path, new_name); // get all predecessor bricks for (i = 0; i < prev_count; i++) { char *path = paths[i]; if (!path) { MARS_ERR("could not build path %d\n", i); goto err; } prev[i] = mars_find_brick(global, prev_brick_type[i], path); if (!prev[i]) { MARS_ERR("prev brick '%s' does not exist\n", path); goto err; } MARS_DBG("------> predecessor %d path = '%s'\n", i, path); } // some generic brick replacements (better performance / network functionality) brick = NULL; if ((new_brick_type == _bio_brick_type || new_brick_type == _aio_brick_type) && _client_brick_type != NULL) { char *remote = strchr(new_name, '@'); if (remote) { remote++; MARS_DBG("substitute by remote brick '%s' on peer '%s'\n", new_name, remote); brick = mars_make_brick(global, belongs, _client_brick_type, new_path, new_name); if (brick) { struct client_brick *_brick = (void*)brick; _brick->max_flying = 10000; } } } if (!brick && new_brick_type == _bio_brick_type && _aio_brick_type) { struct kstat test = {}; int status = mars_stat(new_path, &test, false); if (SKIP_BIO || status < 0 || !S_ISBLK(test.mode)) { new_brick_type = _aio_brick_type; MARS_DBG("substitute bio by aio\n"); } } // create it... if (!brick) brick = mars_make_brick(global, belongs, new_brick_type, new_path, new_name); if (unlikely(!brick)) { MARS_ERR("creation failed '%s' '%s'\n", new_path, new_name); goto err; } if (unlikely(brick->nr_inputs < prev_count)) { MARS_ERR("wrong number of arguments: %d < %d\n", brick->nr_inputs, prev_count); goto err; } // connect the wires for (i = 0; i < prev_count; i++) { int status; status = generic_connect((void*)brick->inputs[i], (void*)prev[i]->outputs[0]); if (unlikely(status < 0)) { MARS_ERR("'%s' '%s' cannot connect input %d\n", new_path, new_name, i); goto err; } } // call setup function if (setup_fn) { setup_fn(brick, private); } do_switch: // switch on/off (may fail silently, but responsibility is at the workers) if (timeout > 0 || !switch_state) { int status; status = mars_power_button_recursive((void*)brick, switch_state, false, timeout); MARS_DBG("switch %d status = %d\n", switch_state, status); #if 0 // TODO: need cleanup_fn() here FIXME: interferes with logic needing the switched-off brick! if (!switch_state && status >= 0 && !brick->power.button && brick->power.led_off) { mars_kill_brick(brick); brick = NULL; } #endif } goto done; err: if (brick) { mars_kill_brick(brick); } brick = NULL; done: for (i = 0; i < prev_count; i++) { if (paths[i]) { kfree(paths[i]); } } if (_new_path) kfree(_new_path); if (switch_path) kfree(switch_path); return brick; } EXPORT_SYMBOL_GPL(make_brick_all); ///////////////////////////////////////////////////////////////////// // init stuff #define LIMIT_MEM #ifdef LIMIT_MEM #include #include #endif long long mars_global_memlimit = 0; EXPORT_SYMBOL_GPL(mars_global_memlimit); struct mm_struct *mm_fake = NULL; EXPORT_SYMBOL_GPL(mm_fake); static int __init init_mars(void) { MARS_INF("init_mars()\n"); #ifdef LIMIT_MEM // provisionary mars_global_memlimit = total_swapcache_pages * (PAGE_SIZE / 4); MARS_INF("mars_global_memlimit = %lld\n", mars_global_memlimit); #endif brick_obj_max = BRICK_OBJ_MAX; mars_tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!mars_tfm) { MARS_ERR("cannot alloc crypto hash\n"); return -ENOMEM; } if (IS_ERR(mars_tfm)) { MARS_ERR("alloc crypto hash failed, status = %d\n", PTR_ERR(mars_tfm)); return PTR_ERR(mars_tfm); } #if 0 if (crypto_tfm_alg_type(crypto_hash_tfm(mars_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { MARS_ERR("bad crypto hash type\n"); return -EINVAL; } #endif mars_digest_size = crypto_hash_digestsize(mars_tfm); MARS_INF("digest_size = %d\n", mars_digest_size); set_fake(); #ifdef MARS_TRACING { int flags = O_CREAT | O_TRUNC | O_RDWR | O_LARGEFILE; int prot = 0600; mm_segment_t oldfs; oldfs = get_fs(); set_fs(get_ds()); mars_log_file = filp_open("/mars/trace.csv", flags, prot); set_fs(oldfs); if (IS_ERR(mars_log_file)) { MARS_ERR("cannot create trace logfile, status = %ld\n", PTR_ERR(mars_log_file)); mars_log_file = NULL; } } #endif return 0; } static void __exit exit_mars(void) { MARS_INF("exit_mars()\n"); if (id) { kfree(id); id = NULL; } if (mars_tfm) { crypto_free_hash(mars_tfm); } put_fake(); #ifdef MARS_TRACING if (mars_log_file) { filp_close(mars_log_file, NULL); mars_log_file = NULL; } #endif } MODULE_DESCRIPTION("MARS block storage"); MODULE_AUTHOR("Thomas Schoebel-Theuer "); MODULE_LICENSE("GPL"); module_init(init_mars); module_exit(exit_mars);