/* * MARS Long Distance Replication Software * * This file is part of MARS project: http://schoebel.github.io/mars/ * * Copyright (C) 2010-2014 Thomas Schoebel-Theuer * Copyright (C) 2011-2014 1&1 Internet AG * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include "brick_say.h" #include "lamport.h" /*******************************************************************/ /* messaging */ #include #include #include #include #include #include #include #include #include #include #include #include "vfs_compat.h" #ifndef GFP_BRICK #define GFP_BRICK GFP_NOIO #endif #define SAY_ORDER 0 #define SAY_BUFMAX (PAGE_SIZE << SAY_ORDER) #define SAY_BUF_LIMIT (SAY_BUFMAX - 1500) #define MAX_FILELEN 16 #define MAX_IDS 1000 const char *say_class[MAX_SAY_CLASS] = { [SAY_DEBUG] = "debug", [SAY_INFO] = "info", [SAY_WARN] = "warn", [SAY_ERROR] = "error", [SAY_FATAL] = "fatal", [SAY_TOTAL] = "total", }; int brick_say_logging = 1; module_param_named(say_logging, brick_say_logging, int, 0); int brick_say_debug; module_param_named(say_debug, brick_say_debug, int, 0); int brick_say_syslog_min = 1; int brick_say_syslog_max = -1; int brick_say_syslog_flood_class = 3; int brick_say_syslog_flood_limit = 20; int brick_say_syslog_flood_recovery = 300; int delay_say_on_overflow = #ifdef CONFIG_MARS_DEBUG 1; #else 0; #endif static atomic_t say_alloc_channels = ATOMIC_INIT(0); static atomic_t say_alloc_names = ATOMIC_INIT(0); static atomic_t say_alloc_pages = ATOMIC_INIT(0); static unsigned long flood_start_jiffies; static int flood_count; struct say_channel { char *ch_name; struct say_channel *ch_next; spinlock_t ch_lock[MAX_SAY_CLASS]; char *ch_buf[MAX_SAY_CLASS][2]; short ch_index[MAX_SAY_CLASS]; struct file *ch_filp[MAX_SAY_CLASS][2]; int ch_overflow[MAX_SAY_CLASS]; bool ch_written[MAX_SAY_CLASS]; bool ch_rollover; bool ch_must_exist; bool ch_is_dir; bool ch_delete; int ch_status_written; int ch_id_max; void *ch_ids[MAX_IDS]; wait_queue_head_t ch_progress; }; struct say_channel *default_channel; static struct say_channel *channel_list; static rwlock_t say_lock = __RW_LOCK_UNLOCKED(say_lock); static struct task_struct *say_thread; static DECLARE_WAIT_QUEUE_HEAD(say_event); bool say_dirty; #define use_atomic() \ ((preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK | HARDIRQ_MASK | NMI_MASK)) != 0 || irqs_disabled()) static void wait_channel(struct say_channel *ch, int class) { if (delay_say_on_overflow && ch->ch_index[class] > SAY_BUF_LIMIT) { if (!use_atomic()) { say_dirty = true; wake_up_interruptible(&say_event); wait_event_interruptible_timeout(ch->ch_progress, ch->ch_index[class] < SAY_BUF_LIMIT, HZ / 10); } } } static struct say_channel *find_channel(const void *id) { struct say_channel *res = default_channel; struct say_channel *ch; read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { int i; for (i = 0; i < ch->ch_id_max; i++) { if (ch->ch_ids[i] == id) { res = ch; goto found; } } } found: read_unlock(&say_lock); return res; } static void _remove_binding(struct task_struct *whom) { struct say_channel *ch; int i; for (ch = channel_list; ch; ch = ch->ch_next) { for (i = 0; i < ch->ch_id_max; i++) { if (ch->ch_ids[i] == whom) ch->ch_ids[i] = NULL; } } } void bind_to_channel(struct say_channel *ch, struct task_struct *whom) { int i; write_lock(&say_lock); _remove_binding(whom); for (i = 0; i < ch->ch_id_max; i++) { if (!ch->ch_ids[i]) { ch->ch_ids[i] = whom; goto done; } } if (likely(ch->ch_id_max < MAX_IDS - 1)) ch->ch_ids[ch->ch_id_max++] = whom; else goto err; done: write_unlock(&say_lock); goto out_return; err: write_unlock(&say_lock); say_to(default_channel, SAY_ERROR, "ID overflow for thread '%s'\n", whom->comm); out_return:; } struct say_channel *get_binding(struct task_struct *whom) { struct say_channel *ch; int i; read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { for (i = 0; i < ch->ch_id_max; i++) { if (ch->ch_ids[i] == whom) goto found; } } ch = NULL; found: read_unlock(&say_lock); return ch; } void remove_binding_from(struct say_channel *ch, struct task_struct *whom) { bool found = false; int i; write_lock(&say_lock); for (i = 0; i < ch->ch_id_max; i++) { if (ch->ch_ids[i] == whom) { ch->ch_ids[i] = NULL; found = true; break; } } if (!found) _remove_binding(whom); write_unlock(&say_lock); } void remove_binding(struct task_struct *whom) { write_lock(&say_lock); _remove_binding(whom); write_unlock(&say_lock); } void rollover_channel(struct say_channel *ch) { if (!ch) ch = find_channel(current); if (likely(ch)) ch->ch_rollover = true; } void rollover_all(void) { struct say_channel *ch; read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) ch->ch_rollover = true; read_unlock(&say_lock); } void del_channel(struct say_channel *ch) { if (unlikely(!ch)) goto out_return; if (unlikely(ch == default_channel)) { say_to(default_channel, SAY_ERROR, "thread '%s' tried to delete the default channel\n", current->comm); goto out_return; } ch->ch_delete = true; out_return:; } static void _del_channel(struct say_channel *ch) { struct say_channel *tmp; struct say_channel **_tmp; int i, j; if (!ch) goto out_return; write_lock(&say_lock); for (_tmp = &channel_list; (tmp = *_tmp) != NULL; _tmp = &tmp->ch_next) { if (tmp == ch) { *_tmp = tmp->ch_next; break; } } write_unlock(&say_lock); for (i = 0; i < MAX_SAY_CLASS; i++) { for (j = 0; j < 2; j++) { if (ch->ch_filp[i][j]) { filp_close(ch->ch_filp[i][j], NULL); ch->ch_filp[i][j] = NULL; } } for (j = 0; j < 2; j++) { char *buf = ch->ch_buf[i][j]; if (buf) { __free_pages(virt_to_page((unsigned long)buf), SAY_ORDER); atomic_dec(&say_alloc_pages); } } } if (ch->ch_name) { atomic_dec(&say_alloc_names); kfree(ch->ch_name); } kfree(ch); atomic_dec(&say_alloc_channels); out_return:; } static struct say_channel *_make_channel(const char *name, bool must_exist) { struct say_channel *res = NULL; struct kstat kstat = {}; int i, j; unsigned long mode = use_atomic() ? GFP_ATOMIC : GFP_BRICK; mm_segment_t oldfs; bool is_dir = false; int status; oldfs = get_fs(); set_fs(get_ds()); status = vfs_stat((char *)name, &kstat); set_fs(oldfs); if (unlikely(status < 0)) { if (must_exist) { say(SAY_ERROR, "cannot create channel '%s', status = %d\n", name, status); goto done; } } else { is_dir = S_ISDIR(kstat.mode); } restart: res = kzalloc(sizeof(struct say_channel), mode); if (unlikely(!res)) { schedule(); goto restart; } atomic_inc(&say_alloc_channels); res->ch_must_exist = must_exist; res->ch_is_dir = is_dir; init_waitqueue_head(&res->ch_progress); restart2: res->ch_name = kstrdup(name, mode); if (unlikely(!res->ch_name)) { schedule(); goto restart2; } atomic_inc(&say_alloc_names); for (i = 0; i < MAX_SAY_CLASS; i++) { spin_lock_init(&res->ch_lock[i]); for (j = 0; j < 2; j++) { char *buf; restart3: buf = (void *)__get_free_pages(mode, SAY_ORDER); if (unlikely(!buf)) { schedule(); goto restart3; } atomic_inc(&say_alloc_pages); res->ch_buf[i][j] = buf; } } done: return res; } struct say_channel *make_channel(const char *name, bool must_exist) { struct say_channel *res = NULL; struct say_channel *ch; read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { if (!strcmp(ch->ch_name, name)) { res = ch; break; } } read_unlock(&say_lock); if (unlikely(!res)) { res = _make_channel(name, must_exist); if (unlikely(!res)) goto done; write_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { if (ch != res && unlikely(!strcmp(ch->ch_name, name))) { _del_channel(res); res = ch; goto race_found; } } res->ch_next = channel_list; channel_list = res; race_found: write_unlock(&say_lock); } done: return res; } /* tell gcc to check for varargs errors */ static void _say(struct say_channel *ch, int class, va_list args, bool use_args, const char *fmt, ...) __printf(5, 6); static void _say(struct say_channel *ch, int class, va_list args, bool use_args, const char *fmt, ...) { char *start; int offset; int rest; int written; if (unlikely(!ch)) goto out_return; if (unlikely(ch->ch_delete && ch != default_channel)) { say_to(default_channel, SAY_ERROR, "thread '%s' tried to write on deleted channel\n", current->comm); goto out_return; } offset = ch->ch_index[class]; start = ch->ch_buf[class][0] + offset; rest = SAY_BUFMAX - 1 - offset; if (unlikely(rest <= 0)) { ch->ch_overflow[class]++; goto out_return; } if (use_args) { va_list args2; va_start(args2, fmt); written = vscnprintf(start, rest, fmt, args2); va_end(args2); } else { written = vscnprintf(start, rest, fmt, args); } if (likely(rest > written)) { start[written] = '\0'; ch->ch_index[class] += written; say_dirty = true; } else { /* indicate overflow */ start[0] = '\0'; ch->ch_overflow[class]++; } out_return:; } void say_to(struct say_channel *ch, int class, const char *fmt, ...) { va_list args; unsigned long flags; if (!class && !brick_say_debug) goto out_return; if (!ch) ch = find_channel(current); if (likely(ch)) { if (!ch->ch_is_dir) class = SAY_TOTAL; if (likely(class >= 0 && class < MAX_SAY_CLASS)) { wait_channel(ch, class); spin_lock_irqsave(&ch->ch_lock[class], flags); va_start(args, fmt); _say(ch, class, args, false, fmt); va_end(args); spin_unlock_irqrestore(&ch->ch_lock[class], flags); } } ch = default_channel; if (likely(ch)) { class = SAY_TOTAL; wait_channel(ch, class); spin_lock_irqsave(&ch->ch_lock[class], flags); va_start(args, fmt); _say(ch, class, args, false, fmt); va_end(args); spin_unlock_irqrestore(&ch->ch_lock[class], flags); wake_up_interruptible(&say_event); } out_return:; } void brick_say_to(struct say_channel *ch, int class, bool dump, const char *prefix, const char *file, int line, const char *func, const char *fmt, ...) { const char *channel_name = "-"; struct timespec s_now; struct timespec l_now; int filelen; int orig_class; va_list args; unsigned long flags; if (!class && !brick_say_debug) goto out_return; s_now = CURRENT_TIME; get_lamport(&l_now); if (!ch) ch = find_channel(current); orig_class = class; /* limit the filename */ filelen = strlen(file); if (filelen > MAX_FILELEN) file += filelen - MAX_FILELEN; if (likely(ch)) { channel_name = ch->ch_name; if (!ch->ch_is_dir) class = SAY_TOTAL; if (likely(class >= 0 && class < MAX_SAY_CLASS)) { wait_channel(ch, class); spin_lock_irqsave(&ch->ch_lock[class], flags); _say(ch, class, NULL, true, "%ld.%09ld %ld.%09ld %s %s[%d] %s:%d %s(): ", s_now.tv_sec, s_now.tv_nsec, l_now.tv_sec, l_now.tv_nsec, prefix, current->comm, (int)smp_processor_id(), file, line, func); va_start(args, fmt); _say(ch, class, args, false, fmt); va_end(args); spin_unlock_irqrestore(&ch->ch_lock[class], flags); } } ch = default_channel; if (likely(ch)) { wait_channel(ch, SAY_TOTAL); spin_lock_irqsave(&ch->ch_lock[SAY_TOTAL], flags); _say(ch, SAY_TOTAL, NULL, true, "%ld.%09ld %ld.%09ld %s_%-5s %s %s[%d] %s:%d %s(): ", s_now.tv_sec, s_now.tv_nsec, l_now.tv_sec, l_now.tv_nsec, prefix, say_class[orig_class], channel_name, current->comm, (int)smp_processor_id(), file, line, func); va_start(args, fmt); _say(ch, SAY_TOTAL, args, false, fmt); va_end(args); spin_unlock_irqrestore(&ch->ch_lock[SAY_TOTAL], flags); } #ifdef CONFIG_MARS_DEBUG if (dump) brick_dump_stack(); #endif wake_up_interruptible(&say_event); out_return:; } static void try_open_file(struct file **file, char *filename, bool creat) { struct address_space *mapping; int flags = O_APPEND | O_WRONLY | O_LARGEFILE; int prot = 0600; if (creat) flags |= O_CREAT; *file = filp_open(filename, flags, prot); if (unlikely(IS_ERR(*file))) { *file = NULL; goto out_return; } mapping = (*file)->f_mapping; if (likely(mapping)) mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & ~(__GFP_IO | __GFP_FS)); out_return:; } static void out_to_file(struct file *file, char *buf, int len) { loff_t log_pos = 0; mm_segment_t oldfs; if (file) { oldfs = get_fs(); set_fs(get_ds()); (void)vfs_write(file, buf, len, &log_pos); set_fs(oldfs); } } static inline void reset_flood(void) { if (flood_start_jiffies && time_is_before_jiffies(flood_start_jiffies + brick_say_syslog_flood_recovery * HZ)) { flood_start_jiffies = 0; flood_count = 0; } } static void printk_with_class(int class, char *buf) { switch (class) { case SAY_INFO: printk(KERN_INFO "%s", buf); break; case SAY_WARN: printk(KERN_WARNING "%s", buf); break; case SAY_ERROR: case SAY_FATAL: printk(KERN_ERR "%s", buf); break; default: printk(KERN_DEBUG "%s", buf); } } static void out_to_syslog(int class, char *buf, int len) { reset_flood(); if (class >= brick_say_syslog_min && class <= brick_say_syslog_max) { buf[len] = '\0'; printk_with_class(class, buf); } else if (class >= brick_say_syslog_flood_class && brick_say_syslog_flood_class >= 0 && class != SAY_TOTAL) { flood_start_jiffies = jiffies; if (++flood_count <= brick_say_syslog_flood_limit) { buf[len] = '\0'; printk_with_class(class, buf); } } } static inline char *_make_filename(struct say_channel *ch, int class, int transact, int add_tmp) { char *filename; restart: filename = kmalloc(1024, GFP_KERNEL); if (unlikely(!filename)) { schedule(); goto restart; } atomic_inc(&say_alloc_names); if (ch->ch_is_dir) { snprintf(filename, 1023, "%s/%d.%s.%s%s", ch->ch_name, class, say_class[class], transact ? "status" : "log", add_tmp ? ".tmp" : ""); } else { snprintf(filename, 1023, "%s.%s%s", ch->ch_name, transact ? "status" : "log", add_tmp ? ".tmp" : ""); } return filename; } static void _rollover_channel(struct say_channel *ch) { int start = 0; int class; ch->ch_rollover = false; ch->ch_status_written = 0; if (!ch->ch_is_dir) start = SAY_TOTAL; for (class = start; class < MAX_SAY_CLASS; class++) { char *old = _make_filename(ch, class, 1, 1); char *new = _make_filename(ch, class, 1, 0); if (likely(old && new)) { int i; mm_segment_t oldfs; for (i = 0; i < 2; i++) { if (ch->ch_filp[class][i]) { filp_close(ch->ch_filp[class][i], NULL); ch->ch_filp[class][i] = NULL; } } oldfs = get_fs(); set_fs(get_ds()); #ifdef __USE_COMPAT _compat_rename(old, new); #else sys_rename(old, new); #endif set_fs(oldfs); } if (likely(old)) { kfree(old); atomic_dec(&say_alloc_names); } if (likely(new)) { kfree(new); atomic_dec(&say_alloc_names); } } } static void treat_channel(struct say_channel *ch, int class) { int len; int overflow; int transact; int start; char *buf; char *tmp; unsigned long flags; spin_lock_irqsave(&ch->ch_lock[class], flags); buf = ch->ch_buf[class][0]; tmp = ch->ch_buf[class][1]; ch->ch_buf[class][1] = buf; ch->ch_buf[class][0] = tmp; len = ch->ch_index[class]; ch->ch_index[class] = 0; overflow = ch->ch_overflow[class]; ch->ch_overflow[class] = 0; spin_unlock_irqrestore(&ch->ch_lock[class], flags); wake_up_interruptible(&ch->ch_progress); ch->ch_status_written += len; out_to_syslog(class, buf, len); start = 0; if (!brick_say_logging) start++; for (transact = start; transact < 2; transact++) { if (unlikely(!ch->ch_filp[class][transact])) { char *filename = _make_filename(ch, class, transact, transact); if (likely(filename)) { try_open_file(&ch->ch_filp[class][transact], filename, transact); kfree(filename); atomic_dec(&say_alloc_names); } } out_to_file(ch->ch_filp[class][transact], buf, len); } if (unlikely(overflow > 0)) { struct timespec s_now = CURRENT_TIME; struct timespec l_now; get_lamport(&l_now); len = scnprintf(buf, SAY_BUFMAX, "%ld.%09ld %ld.%09ld %s %d OVERFLOW %d times\n", s_now.tv_sec, s_now.tv_nsec, l_now.tv_sec, l_now.tv_nsec, ch->ch_name, class, overflow); ch->ch_status_written += len; out_to_syslog(class, buf, len); for (transact = 0; transact < 2; transact++) out_to_file(ch->ch_filp[class][transact], buf, len); } } static int _say_thread(void *data) { while (!kthread_should_stop()) { struct say_channel *ch; int i; wait_event_interruptible_timeout(say_event, say_dirty, HZ); say_dirty = false; restart_rollover: read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { if (ch->ch_rollover && ch->ch_status_written > 0) { read_unlock(&say_lock); _rollover_channel(ch); goto restart_rollover; } } read_unlock(&say_lock); restart: read_lock(&say_lock); for (ch = channel_list; ch; ch = ch->ch_next) { int start = 0; if (!ch->ch_is_dir) start = SAY_TOTAL; for (i = start; i < MAX_SAY_CLASS; i++) { if (ch->ch_index[i] > 0) { read_unlock(&say_lock); treat_channel(ch, i); goto restart; } } if (ch->ch_delete) { read_unlock(&say_lock); _del_channel(ch); goto restart; } } read_unlock(&say_lock); } return 0; } void init_say(void) { default_channel = make_channel(CONFIG_MARS_LOGDIR, true); say_thread = kthread_create(_say_thread, NULL, "brick_say"); if (IS_ERR(say_thread)) { say_thread = NULL; } else { get_task_struct(say_thread); wake_up_process(say_thread); } } void exit_say(void) { int memleak_channels; int memleak_names; int memleak_pages; if (say_thread) { kthread_stop(say_thread); put_task_struct(say_thread); say_thread = NULL; } default_channel = NULL; while (channel_list) _del_channel(channel_list); memleak_channels = atomic_read(&say_alloc_channels); memleak_names = atomic_read(&say_alloc_names); memleak_pages = atomic_read(&say_alloc_pages); if (unlikely(memleak_channels || memleak_names || memleak_pages)) printk("MEMLEAK: channels=%d names=%d pages=%d\n", memleak_channels, memleak_names, memleak_pages); } #ifdef CONFIG_MARS_DEBUG static int dump_max = 5; void brick_dump_stack(void) { if (dump_max > 0) { dump_max--; /* racy, but does no harm */ dump_stack(); } } #endif