mirror of https://github.com/schoebel/mars
infra: new Lamport clock implementation
This commit is contained in:
parent
4f071e362f
commit
104b3a522a
165
kernel/lamport.c
165
kernel/lamport.c
|
@ -3,8 +3,8 @@
|
||||||
*
|
*
|
||||||
* This file is part of MARS project: http://schoebel.github.io/mars/
|
* This file is part of MARS project: http://schoebel.github.io/mars/
|
||||||
*
|
*
|
||||||
* Copyright (C) 2010-2014 Thomas Schoebel-Theuer
|
* Copyright (C) 2010-2017 Thomas Schoebel-Theuer
|
||||||
* Copyright (C) 2011-2014 1&1 Internet AG
|
* Copyright (C) 2011-2017 1&1 Internet AG
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
@ -24,50 +24,141 @@
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/semaphore.h>
|
#include <linux/rwsem.h>
|
||||||
|
|
||||||
#include "lamport.h"
|
#include "lamport.h"
|
||||||
|
|
||||||
|
/* This implementation is a variant of the following:
|
||||||
struct semaphore lamport_sem = __SEMAPHORE_INITIALIZER(lamport_sem, 1); // TODO: replace with spinlock if possible (first check)
|
*
|
||||||
struct timespec lamport_now = {};
|
@article{Lamport78,
|
||||||
|
author = {Leslie Lamport},
|
||||||
void get_lamport(struct timespec *real_now, struct timespec *lamp_now)
|
title = {Time, Clocks, and the Ordering of Events in a Distributed System},
|
||||||
{
|
journal = {CACM},
|
||||||
int diff;
|
volume = {21},
|
||||||
|
number = {7},
|
||||||
down(&lamport_sem);
|
year = {1978},
|
||||||
|
pages = {558--565}
|
||||||
*lamp_now = CURRENT_TIME;
|
|
||||||
if (real_now)
|
|
||||||
*real_now = *lamp_now;
|
|
||||||
diff = timespec_compare(lamp_now, &lamport_now);
|
|
||||||
if (diff >= 0) {
|
|
||||||
timespec_add_ns(lamp_now, 1);
|
|
||||||
memcpy(&lamport_now, lamp_now, sizeof(lamport_now));
|
|
||||||
timespec_add_ns(&lamport_now, 1);
|
|
||||||
} else {
|
|
||||||
timespec_add_ns(&lamport_now, 1);
|
|
||||||
memcpy(lamp_now, &lamport_now, sizeof(*lamp_now));
|
|
||||||
}
|
}
|
||||||
|
* We always get both the local real time and the Lamport time in parallel.
|
||||||
|
* The Lamport timestamp cannot fall behind the real timestamp, but
|
||||||
|
* it may go ahead (into the "future") when clocks in the distributed
|
||||||
|
* system are not synchronized precisely enough (e.g. via ntp).
|
||||||
|
*
|
||||||
|
* Thus we have a physical Lamport clock with the additional property
|
||||||
|
* that it cannot fall behind local realtime.
|
||||||
|
*/
|
||||||
|
|
||||||
up(&lamport_sem);
|
/* TODO CHECK: would a different locking method be better?
|
||||||
|
* rwlocks? RCU?
|
||||||
|
*
|
||||||
|
* I did not really check it, due to lack of time.
|
||||||
|
*
|
||||||
|
* The reason why I chose rw_semaphore (against some contemporary
|
||||||
|
* "common belief") is the following:
|
||||||
|
*
|
||||||
|
* A Lamport clock is a _global_ object by definition (with respect
|
||||||
|
* to an SMP system => attention we have two levels of parallelism:
|
||||||
|
* one at the Distributed System level, and SMP at the node level).
|
||||||
|
*
|
||||||
|
* Thus it _can_ happen that the Lamport clock forms a bottleneck,
|
||||||
|
* e.g. when O(n) MARS ressources are syncing in parallel over a fast
|
||||||
|
* network.
|
||||||
|
*
|
||||||
|
* Looking only at the "best case" where spinlocks or RCU might be faster
|
||||||
|
* is therefore fundamentally broken. Instead, not only the
|
||||||
|
* average case has to be observed, but also the worst case.
|
||||||
|
*
|
||||||
|
* We have some 40-core SMP/NUMA machines now (2017) at 1&1, and the number
|
||||||
|
* will likely increase to 72 cores this year.
|
||||||
|
* I know of cases where spinlock contention is really happening on
|
||||||
|
* such machines in practice. If it happens, it almost kills the machine.
|
||||||
|
*
|
||||||
|
* When O(n) processors are spinning for the same bottleneck only _once_
|
||||||
|
* each, already O(n^2) CPU cycles are burnt. When the bottleneck is
|
||||||
|
* a _continuous_ one (e.g. multiple long-lasting MARS syncs in parallel),
|
||||||
|
* then the whole machine may loose its efficiency and burn more than 90%
|
||||||
|
* of its total CPU power in spinlocks.
|
||||||
|
*
|
||||||
|
* Thus I think some kind of scheduling lock is needed because the worst
|
||||||
|
* case is an important one when the number of processors is high.
|
||||||
|
*
|
||||||
|
* Don't test this on workstations or notebooks, please test it on
|
||||||
|
* the _most_ _powerful_ _servers_ you can get.
|
||||||
|
*
|
||||||
|
* THINK: is performance really the right measure in the long-term future?
|
||||||
|
*
|
||||||
|
* I think we should consider the _power_ _consumption_ (nJ / LamportOperation)
|
||||||
|
* as a candidate for a more important measure in future.
|
||||||
|
*
|
||||||
|
* Please improve this code, but please use the right optimisation goal.
|
||||||
|
*/
|
||||||
|
struct rw_semaphore lamport_sem = __RWSEM_INITIALIZER(lamport_sem);
|
||||||
|
|
||||||
|
struct timespec lamport_stamp = {};
|
||||||
|
|
||||||
|
void get_lamport(struct timespec *real_now, struct timespec *lamport_now)
|
||||||
|
{
|
||||||
|
struct timespec _real_now;
|
||||||
|
struct timespec _lamport_now;
|
||||||
|
|
||||||
|
/* Get a consistent copy of _both_ clocks */
|
||||||
|
down_read(&lamport_sem);
|
||||||
|
_lamport_now = lamport_stamp;
|
||||||
|
/* Theoretically, the next statement could be moved behind the unlock.
|
||||||
|
* However, then we will loose strictness of real timestamps,
|
||||||
|
* or even may produce contradictory orderings between real and
|
||||||
|
* Lamport timestamps, respectively, in relation to pseudo-parallel
|
||||||
|
* calls to get_lamport().
|
||||||
|
*/
|
||||||
|
_real_now = CURRENT_TIME;
|
||||||
|
up_read(&lamport_sem);
|
||||||
|
|
||||||
|
if (real_now)
|
||||||
|
*real_now = _real_now;
|
||||||
|
/* use the maximum of both clocks as Lamport timestamp */
|
||||||
|
if (timespec_compare(&_real_now, &_lamport_now) >= 0)
|
||||||
|
*lamport_now = _real_now;
|
||||||
|
else
|
||||||
|
*lamport_now = _lamport_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(get_lamport);
|
EXPORT_SYMBOL_GPL(get_lamport);
|
||||||
|
|
||||||
void set_lamport(struct timespec *old)
|
void set_lamport(struct timespec *lamport_old)
|
||||||
{
|
{
|
||||||
int diff;
|
/* Always advance the internal Lamport timestamp a little bit
|
||||||
|
* in order to ensure strict monotonicity between set_lamport() calls.
|
||||||
down(&lamport_sem);
|
*/
|
||||||
|
down_write(&lamport_sem);
|
||||||
diff = timespec_compare(old, &lamport_now);
|
if (timespec_compare(lamport_old, &lamport_stamp) > 0)
|
||||||
if (diff >= 0) {
|
lamport_stamp = *lamport_old;
|
||||||
memcpy(&lamport_now, old, sizeof(lamport_now));
|
else
|
||||||
timespec_add_ns(&lamport_now, 1);
|
timespec_add_ns(&lamport_stamp, 1);
|
||||||
}
|
up_write(&lamport_sem);
|
||||||
|
|
||||||
up(&lamport_sem);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(set_lamport);
|
EXPORT_SYMBOL_GPL(set_lamport);
|
||||||
|
|
||||||
|
/* After advancing the Lamport time, re-get the new values.
|
||||||
|
* This is almost equivalent to a sequence of set_lamport() ; get_lamport()
|
||||||
|
* but more efficient because the lock is taken only once.
|
||||||
|
*/
|
||||||
|
void set_get_lamport(struct timespec *lamport_old, struct timespec *real_now, struct timespec *lamport_now)
|
||||||
|
{
|
||||||
|
struct timespec _real_now;
|
||||||
|
|
||||||
|
down_write(&lamport_sem);
|
||||||
|
if (timespec_compare(lamport_old, &lamport_stamp) > 0)
|
||||||
|
*lamport_now = *lamport_old;
|
||||||
|
else
|
||||||
|
*lamport_now = timespec_add(lamport_stamp, (struct timespec){0, 1});
|
||||||
|
lamport_stamp = *lamport_now;
|
||||||
|
_real_now = CURRENT_TIME;
|
||||||
|
up_write(&lamport_sem);
|
||||||
|
|
||||||
|
if (real_now)
|
||||||
|
*real_now = _real_now;
|
||||||
|
/* use the maximum of both clocks as Lamport timestamp */
|
||||||
|
if (timespec_compare(&_real_now, lamport_now) > 0)
|
||||||
|
*lamport_now = _real_now;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(set_get_lamport);
|
||||||
|
|
|
@ -30,9 +30,13 @@
|
||||||
* We always get both the local real time and the Lamport time in parallel,
|
* We always get both the local real time and the Lamport time in parallel,
|
||||||
* consistently.
|
* consistently.
|
||||||
*
|
*
|
||||||
|
* The implementation ensures that the distributed Lamport timestamp can
|
||||||
|
* never fall behind the local real time.
|
||||||
|
*
|
||||||
* When not interested in real time, you can simply leave real_now at NULL.
|
* When not interested in real time, you can simply leave real_now at NULL.
|
||||||
*/
|
*/
|
||||||
extern void get_lamport(struct timespec *real_now, struct timespec *lamport_now);
|
extern void get_lamport(struct timespec *real_now, struct timespec *lamport_now);
|
||||||
extern void set_lamport(struct timespec *lamport_old);
|
extern void set_lamport(struct timespec *lamport_old);
|
||||||
|
extern void set_get_lamport(struct timespec *lamport_old, struct timespec *real_now, struct timespec *lamport_now);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue