infra: factor out mapfree infrastructure from aio

This commit is contained in:
Thomas Schoebel-Theuer 2013-01-23 12:21:22 +01:00
parent 806a62289c
commit cd551926c6
7 changed files with 341 additions and 268 deletions

View File

@ -11,6 +11,7 @@ mars-objs := \
lib_rank.o \
lib_limiter.o \
lib_timing.o \
lib_mapfree.o \
mars_net.o \
mars_server.o \
mars_client.o \

286
lib_mapfree.c Normal file
View File

@ -0,0 +1,286 @@
// (c) 2012 Thomas Schoebel-Theuer / 1&1 Internet AG
#include "lib_mapfree.h"
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/blkdev.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/file.h>
int mapfree_period_sec = 10;
EXPORT_SYMBOL_GPL(mapfree_period_sec);
static
DECLARE_RWSEM(mapfree_mutex);
static
LIST_HEAD(mapfree_list);
static
void mapfree_pages(struct mapfree_info *mf, bool force)
{
struct address_space *mapping;
pgoff_t start;
pgoff_t end;
if (unlikely(!mf->mf_filp || !(mapping = mf->mf_filp->f_mapping)))
goto done;
if (force) {
start = 0;
end = -1;
} else {
unsigned long flags;
loff_t tmp;
loff_t min;
traced_lock(&mf->mf_lock, flags);
min = tmp = mf->mf_min[0];
if (likely(mf->mf_min[1] < min))
min = mf->mf_min[1];
if (tmp) {
mf->mf_min[1] = tmp;
mf->mf_min[0] = 0;
}
traced_unlock(&mf->mf_lock, flags);
if (min || mf->mf_last) {
start = mf->mf_last / PAGE_SIZE;
mf->mf_last = min;
end = min / PAGE_SIZE;
} else { // there was no progress for at least 2 rounds
start = 0;
end = -1;
}
MARS_DBG("file = '%s' start = %lu end = %lu\n", SAFE_STR(mf->mf_name), start, end);
}
if (end >= start || end == -1) {
invalidate_mapping_pages(mapping, start, end);
}
done:;
}
static
void _mapfree_put(struct mapfree_info *mf)
{
if (atomic_dec_and_test(&mf->mf_count)) {
MARS_DBG("closing file '%s' filp = %p\n", mf->mf_name, mf->mf_filp);
list_del_init(&mf->mf_head);
if (likely(mf->mf_filp)) {
mapfree_pages(mf, true);
filp_close(mf->mf_filp, NULL);
}
brick_string_free(mf->mf_name);
brick_mem_free(mf);
}
}
void mapfree_put(struct mapfree_info *mf)
{
down_write(&mapfree_mutex);
_mapfree_put(mf);
up_write(&mapfree_mutex);
}
EXPORT_SYMBOL_GPL(mapfree_put);
struct mapfree_info *mapfree_get(const char *name, int flags)
{
struct mapfree_info *mf = NULL;
struct list_head *tmp;
if (!(flags & O_DIRECT)) {
down_read(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (_mf->mf_flags == flags && !strcmp(_mf->mf_name, name)) {
mf = _mf;
atomic_inc(&mf->mf_count);
break;
}
}
up_read(&mapfree_mutex);
if (mf)
goto done;
}
for (;;) {
struct address_space *mapping;
struct inode *inode;
int ra = 1;
int prot = 0600;
mm_segment_t oldfs;
mf = brick_zmem_alloc(sizeof(struct mapfree_info));
if (unlikely(!mf)) {
MARS_ERR("no mem, name = '%s'\n", name);
continue;
}
mf->mf_name = brick_strdup(name);
if (unlikely(!mf->mf_name)) {
MARS_ERR("no mem, name = '%s'\n", name);
brick_mem_free(mf);
continue;
}
mf->mf_flags = flags;
INIT_LIST_HEAD(&mf->mf_head);
atomic_set(&mf->mf_count, 1);
spin_lock_init(&mf->mf_lock);
oldfs = get_fs();
set_fs(get_ds());
mf->mf_filp = filp_open(name, flags, prot);
set_fs(oldfs);
MARS_DBG("file '%s' flags = %d prot = %d filp = %p\n", name, flags, prot, mf->mf_filp);
if (unlikely(!mf->mf_filp || IS_ERR(mf->mf_filp))) {
int err = PTR_ERR(mf->mf_filp);
MARS_ERR("can't open file '%s' status=%d\n", name, err);
mf->mf_filp = NULL;
_mapfree_put(mf);
mf = NULL;
break;
}
if (unlikely(!(mapping = mf->mf_filp->f_mapping) ||
!(inode = mapping->host))) {
MARS_ERR("file '%s' has no mapping\n", name);
mf->mf_filp = NULL;
_mapfree_put(mf);
mf = NULL;
break;
}
mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & ~(__GFP_IO | __GFP_FS));
if (S_ISBLK(inode->i_mode)) {
MARS_INF("changing blkdev readahead from %lu to %d\n", inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages, ra);
inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages = ra;
}
if (flags & O_DIRECT) { // never share them
break;
}
// maintain global list of all open files
down_write(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (unlikely(_mf->mf_flags == flags && !strcmp(_mf->mf_name, name))) {
MARS_WRN("race on creation of '%s' detected\n", name);
_mapfree_put(mf);
mf = _mf;
goto leave;
}
}
list_add_tail(&mf->mf_head, &mapfree_list);
leave:
up_write(&mapfree_mutex);
break;
}
done:
return mf;
}
EXPORT_SYMBOL_GPL(mapfree_get);
void mapfree_set(struct mapfree_info *mf, loff_t min)
{
unsigned long flags;
traced_lock(&mf->mf_lock, flags);
if (!mf->mf_min[0] || mf->mf_min[0] > min)
mf->mf_min[0] = min;
traced_unlock(&mf->mf_lock, flags);
}
EXPORT_SYMBOL_GPL(mapfree_set);
static
int mapfree_thread(void *data)
{
while (!brick_thread_should_stop()) {
struct mapfree_info *mf = NULL;
struct list_head *tmp;
long long eldest = 0;
brick_msleep(500);
if (mapfree_period_sec <= 0)
continue;
down_read(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (unlikely(!_mf->mf_jiffies)) {
_mf->mf_jiffies = jiffies;
continue;
}
if ((long long)jiffies - _mf->mf_jiffies > mapfree_period_sec * HZ &&
(!mf || _mf->mf_jiffies < eldest)) {
mf = _mf;
eldest = _mf->mf_jiffies;
}
}
if (mf)
atomic_inc(&mf->mf_count);
up_read(&mapfree_mutex);
if (!mf) {
continue;
}
mapfree_pages(mf, false);
mf->mf_jiffies = jiffies;
mapfree_put(mf);
}
return 0;
}
////////////////// module init stuff /////////////////////////
static
struct task_struct *mf_thread = NULL;
int __init init_mars_mapfree(void)
{
MARS_DBG("init_mapfree()\n");
mf_thread = brick_thread_create(mapfree_thread, NULL, "mars_mapfree");
if (unlikely(!mf_thread)) {
MARS_ERR("could not create mapfree thread\n");
return -ENOMEM;
}
return 0;
}
void __exit exit_mars_mapfree(void)
{
MARS_DBG("exit_mapfree()\n");
if (likely(mf_thread)) {
brick_thread_stop(mf_thread);
mf_thread = NULL;
}
}
#ifndef CONFIG_MARS_HAVE_BIGMODULE
MODULE_DESCRIPTION("MARS mapfree infrastructure");
MODULE_AUTHOR("Thomas Schoebel-Theuer <tst@1und1.de>");
MODULE_LICENSE("GPL");
module_init(init_mars_mapfree);
module_exit(exit_mars_mapfree);
#endif

49
lib_mapfree.h Normal file
View File

@ -0,0 +1,49 @@
// (c) 2012 Thomas Schoebel-Theuer / 1&1 Internet AG
#ifndef MARS_LIB_MAPFREE_H
#define MARS_LIB_MAPFREE_H
/* Mapfree infrastructure.
*
* Purposes:
*
* 1) Open files only once when possible, do ref-counting on struct mapfree_info
*
* 2) Automatically call invalidate_mapping_pages() in the background on
* "unused" areas to free resources.
* Used areas can be indicated by calling mapfree_set() frequently.
* Usage model: tailored to sequential logfiles.
*
* 3) Do it all in a completely decoupled manner, in order to prevent resource deadlocks.
*
* 4) Also to prevent deadlocks: always set mapping_set_gfp_mask() accordingly.
*/
#include "mars.h"
extern int mapfree_period_sec;
struct mapfree_info {
struct list_head mf_head;
char *mf_name;
struct file *mf_filp;
int mf_flags;
atomic_t mf_count;
spinlock_t mf_lock;
loff_t mf_min[2];
loff_t mf_last;
long long mf_jiffies;
};
struct mapfree_info *mapfree_get(const char *filename, int flags);
void mapfree_put(struct mapfree_info *mf);
void mapfree_set(struct mapfree_info *mf, loff_t min);
////////////////// module init stuff /////////////////////////
int __init init_mars_mapfree(void);
void __exit exit_mars_mapfree(void);
#endif

View File

@ -16,6 +16,7 @@
#include "mars.h"
#include "lib_timing.h"
#include "lib_mapfree.h"
#include "mars_aio.h"
@ -126,260 +127,6 @@ done:
return mref_a;
}
////////////////// mapfree_pages() infrastructure //////////////////
int mapfree_period_sec = 10;
EXPORT_SYMBOL_GPL(mapfree_period_sec);
static
DECLARE_RWSEM(mapfree_mutex);
static
LIST_HEAD(mapfree_list);
struct mapfree_info {
struct list_head mf_head;
char *mf_name;
struct file *mf_filp;
int mf_flags;
atomic_t mf_count;
spinlock_t mf_lock;
loff_t mf_min[2];
loff_t mf_last;
long long mf_jiffies;
};
static
void mapfree_pages(struct mapfree_info *mf, bool force)
{
struct address_space *mapping;
pgoff_t start;
pgoff_t end;
if (unlikely(!mf->mf_filp || !(mapping = mf->mf_filp->f_mapping)))
goto done;
if (force) {
start = 0;
end = -1;
} else {
unsigned long flags;
loff_t tmp;
loff_t min;
traced_lock(&mf->mf_lock, flags);
min = tmp = mf->mf_min[0];
if (likely(mf->mf_min[1] < min))
min = mf->mf_min[1];
if (tmp) {
mf->mf_min[1] = tmp;
mf->mf_min[0] = 0;
}
traced_unlock(&mf->mf_lock, flags);
if (min || mf->mf_last) {
start = mf->mf_last / PAGE_SIZE;
mf->mf_last = min;
end = min / PAGE_SIZE;
} else { // there was no progress for at least 2 rounds
start = 0;
end = -1;
}
MARS_DBG("file = '%s' start = %lu end = %lu\n", SAFE_STR(mf->mf_name), start, end);
}
if (end >= start || end == -1) {
invalidate_mapping_pages(mapping, start, end);
}
done:;
}
static
void _mapfree_put(struct mapfree_info *mf)
{
if (atomic_dec_and_test(&mf->mf_count)) {
MARS_DBG("closing file '%s' filp = %p\n", mf->mf_name, mf->mf_filp);
list_del_init(&mf->mf_head);
if (likely(mf->mf_filp)) {
mapfree_pages(mf, true);
filp_close(mf->mf_filp, NULL);
}
brick_string_free(mf->mf_name);
brick_mem_free(mf);
}
}
static
void mapfree_put(struct mapfree_info *mf)
{
down_write(&mapfree_mutex);
_mapfree_put(mf);
up_write(&mapfree_mutex);
}
static
struct mapfree_info *mapfree_get(const char *name, int flags)
{
struct mapfree_info *mf = NULL;
struct list_head *tmp;
if (!(flags & O_DIRECT)) {
down_read(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (_mf->mf_flags == flags && !strcmp(_mf->mf_name, name)) {
mf = _mf;
atomic_inc(&mf->mf_count);
break;
}
}
up_read(&mapfree_mutex);
if (mf)
goto done;
}
for (;;) {
struct address_space *mapping;
struct inode *inode;
int ra = 1;
int prot = 0600;
mm_segment_t oldfs;
mf = brick_zmem_alloc(sizeof(struct mapfree_info));
if (unlikely(!mf)) {
MARS_ERR("no mem, name = '%s'\n", name);
continue;
}
mf->mf_name = brick_strdup(name);
if (unlikely(!mf->mf_name)) {
MARS_ERR("no mem, name = '%s'\n", name);
brick_mem_free(mf);
continue;
}
mf->mf_flags = flags;
INIT_LIST_HEAD(&mf->mf_head);
atomic_set(&mf->mf_count, 1);
spin_lock_init(&mf->mf_lock);
oldfs = get_fs();
set_fs(get_ds());
mf->mf_filp = filp_open(name, flags, prot);
set_fs(oldfs);
MARS_DBG("file '%s' flags = %d prot = %d filp = %p\n", name, flags, prot, mf->mf_filp);
if (unlikely(!mf->mf_filp || IS_ERR(mf->mf_filp))) {
int err = PTR_ERR(mf->mf_filp);
MARS_ERR("can't open file '%s' status=%d\n", name, err);
mf->mf_filp = NULL;
_mapfree_put(mf);
mf = NULL;
break;
}
if (unlikely(!(mapping = mf->mf_filp->f_mapping) ||
!(inode = mapping->host))) {
MARS_ERR("file '%s' has no mapping\n", name);
mf->mf_filp = NULL;
_mapfree_put(mf);
mf = NULL;
break;
}
mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & ~(__GFP_IO | __GFP_FS));
if (S_ISBLK(inode->i_mode)) {
MARS_INF("changing blkdev readahead from %lu to %d\n", inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages, ra);
inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages = ra;
}
if (flags & O_DIRECT) { // never share them
break;
}
// maintain global list of all open files
down_write(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (unlikely(_mf->mf_flags == flags && !strcmp(_mf->mf_name, name))) {
MARS_WRN("race on creation of '%s' detected\n", name);
_mapfree_put(mf);
mf = _mf;
goto leave;
}
}
list_add_tail(&mf->mf_head, &mapfree_list);
leave:
up_write(&mapfree_mutex);
break;
}
done:
return mf;
}
static
void mapfree_set(struct mapfree_info *mf, loff_t min)
{
unsigned long flags;
traced_lock(&mf->mf_lock, flags);
if (!mf->mf_min[0] || mf->mf_min[0] > min)
mf->mf_min[0] = min;
traced_unlock(&mf->mf_lock, flags);
}
static
int mapfree_thread(void *data)
{
while (!brick_thread_should_stop()) {
struct mapfree_info *mf = NULL;
struct list_head *tmp;
long long eldest = 0;
brick_msleep(500);
if (mapfree_period_sec <= 0)
continue;
down_read(&mapfree_mutex);
for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head);
if (unlikely(!_mf->mf_jiffies)) {
_mf->mf_jiffies = jiffies;
continue;
}
if ((long long)jiffies - _mf->mf_jiffies > mapfree_period_sec * HZ &&
(!mf || _mf->mf_jiffies < eldest)) {
mf = _mf;
eldest = _mf->mf_jiffies;
}
}
if (mf)
atomic_inc(&mf->mf_count);
up_read(&mapfree_mutex);
if (!mf) {
continue;
}
mapfree_pages(mf, false);
mf->mf_jiffies = jiffies;
mapfree_put(mf);
}
return 0;
}
////////////////// own brick / input / output operations //////////////////
static int aio_ref_get(struct aio_output *output, struct mref_object *mref)
@ -1392,17 +1139,9 @@ EXPORT_SYMBOL_GPL(aio_brick_type);
////////////////// module init stuff /////////////////////////
static
struct task_struct *mf_thread = NULL;
int __init init_mars_aio(void)
{
MARS_INF("init_aio()\n");
mf_thread = brick_thread_create(mapfree_thread, NULL, "mars_mapfree");
if (unlikely(!mf_thread)) {
MARS_ERR("could not create mapfree thread\n");
return -ENOMEM;
}
_aio_brick_type = (void*)&aio_brick_type;
return aio_register_brick_type();
}
@ -1410,10 +1149,6 @@ int __init init_mars_aio(void)
void __exit exit_mars_aio(void)
{
MARS_INF("exit_aio()\n");
if (likely(mf_thread)) {
brick_thread_stop(mf_thread);
mf_thread = NULL;
}
aio_unregister_brick_type();
}

View File

@ -14,8 +14,6 @@ extern struct threshold aio_submit_threshold;
extern struct threshold aio_io_threshold[2];
extern struct threshold aio_sync_threshold;
extern int mapfree_period_sec;
//#define USE_CLEVER_SYNC // TODO: NYI (should result in better write performance)
#ifdef USE_CLEVER_SYNC

View File

@ -28,6 +28,8 @@
#include <linux/wait.h>
#include "../lib_mapfree.h"
// used brick types
#include "../mars_server.h"
#include "../mars_client.h"
@ -4153,6 +4155,7 @@ static int __init init_light(void)
DO_INIT(brick_mem);
DO_INIT(brick);
DO_INIT(mars);
DO_INIT(mars_mapfree);
#ifdef CONFIG_MARS_DEBUG // otherwise currently unused
DO_INIT(mars_dummy);
DO_INIT(mars_check);

View File

@ -13,6 +13,7 @@
#include "strategy.h"
#include "mars_proc.h"
#include "../lib_mapfree.h"
#include "../mars_bio.h"
#include "../mars_aio.h"
#include "../mars_copy.h"