mirror of https://github.com/schoebel/mars
Merge branch 'mars0.1.y' into mars0.1a.y
This commit is contained in:
commit
abf5b84b6d
76
ChangeLog
76
ChangeLog
|
@ -30,7 +30,7 @@ Example: mars0.3alpha*:
|
||||||
|
|
||||||
Release Conventions / Branches / Tagnames
|
Release Conventions / Branches / Tagnames
|
||||||
-----------------------------------------
|
-----------------------------------------
|
||||||
FLOW OF BUGFIXES: 0.1 -> 0.1a -> 0.1b -> 0.2 -> ...
|
FLOW OF BUGFIXES: 0.1 -> 0.1a -> ...
|
||||||
|
|
||||||
mars0.1 series (stable, will go EOL soon):
|
mars0.1 series (stable, will go EOL soon):
|
||||||
- Will run in parallel to branch 0.1a for a few
|
- Will run in parallel to branch 0.1a for a few
|
||||||
|
@ -48,64 +48,6 @@ Release Conventions / Branches / Tagnames
|
||||||
- Stable branch: mars0.1a.y
|
- Stable branch: mars0.1a.y
|
||||||
- Stable tagnames: mars0.1astable%02d
|
- Stable tagnames: mars0.1astable%02d
|
||||||
|
|
||||||
mars0.1b series (currently alpha):
|
|
||||||
This is an _imtermediate_ series between 0.1 and 0.2.
|
|
||||||
The goal is to improve _scalability_ to thousands of
|
|
||||||
hosts in one cluster, as well as thousands of resources.
|
|
||||||
Likely, this intermdiate branch will be merged into 0.2
|
|
||||||
and then continue development there. When this point
|
|
||||||
will arrive is uncertain at the moment.
|
|
||||||
Likely, the stabilization of the new scalability features
|
|
||||||
will occur together with the 0.2 series.
|
|
||||||
Reason for this: the rollout strategy at 1&1 to
|
|
||||||
thousands of machines wants to do small incremental
|
|
||||||
steps. The risk of directly going to 0.2 in _masses_
|
|
||||||
is minimized by first rolling out the really necessary
|
|
||||||
changes, and to postpone those developments which are
|
|
||||||
currently not yet really needed in mass deployment.
|
|
||||||
|
|
||||||
mars0.2 series (currently in beta stage):
|
|
||||||
Mostly for internal needs of 1&1 (but not limited to that).
|
|
||||||
- Getting rid of the kernel prepatch! MARS may be built
|
|
||||||
as an external kernel module for any supported
|
|
||||||
kernel version. First prototype is only tested for
|
|
||||||
unaltered 3.2.x vanilla kernel, but compatibility to
|
|
||||||
further vanilla kernel versions (maybe even
|
|
||||||
Redhat-specific ones) will follow during the course of
|
|
||||||
the MARS mars0.2 stable series. The problem is not
|
|
||||||
compatibility as such, but _testing_ that it really
|
|
||||||
works. These tests need a lot of time.
|
|
||||||
=> further arguments for getting to kernel upstream ASAP.
|
|
||||||
- Improved network throughput by parallel TCP connections
|
|
||||||
(in particular under packet loss).
|
|
||||||
Also called "socket bundling".
|
|
||||||
First benchmarks show an impressive speedup over
|
|
||||||
highly congested long-distance lines.
|
|
||||||
- Future-proof updates in the network protocol:
|
|
||||||
Mixed operation of 32/64bit and/or {big,low}endian
|
|
||||||
- Support for multi-homed network interfaces.
|
|
||||||
- Transparent data compression over low bandwidth lines.
|
|
||||||
Consumes a lot of CPU, therefore only recommended for
|
|
||||||
low write loads or for desperate network situations.
|
|
||||||
- Remote device: bypassing iSCSI. In essence,
|
|
||||||
/dev/mars/mydata can appear at any other cluster member
|
|
||||||
which doesn't necessarily need any local disks.
|
|
||||||
- Various smaller features and improvements.
|
|
||||||
- Unstable tagnames: mars0.2beta%d.%d (current)
|
|
||||||
- Stable branch: mars0.2.y (already in use for beta)
|
|
||||||
- Stable tagnames: mars0.2stable%02d (planned)
|
|
||||||
|
|
||||||
mars0.3 series (planned):
|
|
||||||
(some might possibly go to 1.0 series instead)
|
|
||||||
- Improve replication latency.
|
|
||||||
- New pseudo-synchronous replication modes.
|
|
||||||
For the internal needs of database folks at 1&1.
|
|
||||||
- (Maybe) old test suite could be retired, a new
|
|
||||||
one is at github.com/schoebel/test-suite
|
|
||||||
- Unstable tagnames: mars0.3beta%d.%d (planned)
|
|
||||||
- Stable branch: mars0.3.y (planned)
|
|
||||||
- Stable tagnames: mars0.3stable%02d (planned)
|
|
||||||
|
|
||||||
mars1.0 series (planned):
|
mars1.0 series (planned):
|
||||||
- Replace symlink tree by transactional status files
|
- Replace symlink tree by transactional status files
|
||||||
(future-proof)
|
(future-proof)
|
||||||
|
@ -130,16 +72,6 @@ Release Conventions / Branches / Tagnames
|
||||||
necessary for a bugfix, or for an important usability improvement
|
necessary for a bugfix, or for an important usability improvement
|
||||||
(such as clearer display of errors, hints for resolving them, etc).
|
(such as clearer display of errors, hints for resolving them, etc).
|
||||||
|
|
||||||
-----------------------------------
|
|
||||||
Changelog for series 0.2:
|
|
||||||
|
|
||||||
(you need to checkout branch mars0.2.y to see any details)
|
|
||||||
|
|
||||||
-----------------------------------
|
|
||||||
Changelog for series 0.1b:
|
|
||||||
|
|
||||||
(you need to checkout branch mars0.1b.y to see any details)
|
|
||||||
|
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
Changelog for series 0.1a:
|
Changelog for series 0.1a:
|
||||||
|
|
||||||
|
@ -401,6 +333,12 @@ Attention! This branch will go EOL around March 2019.
|
||||||
And even more stable, although the 0.1a releases were
|
And even more stable, although the 0.1a releases were
|
||||||
called "beta" up to now.
|
called "beta" up to now.
|
||||||
|
|
||||||
|
mars0.1stable72
|
||||||
|
* Minor fix: writeback improved in a corner case.
|
||||||
|
* Minor improvement: display WriteBack data amount in
|
||||||
|
marsadm view.
|
||||||
|
* Major doc improvement: describe IO performance tuning.
|
||||||
|
|
||||||
mars0.1stable71
|
mars0.1stable71
|
||||||
* Major fix: writeback at the primary was unnecessarily
|
* Major fix: writeback at the primary was unnecessarily
|
||||||
slow at certain situations.
|
slow at certain situations.
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 53 KiB |
Binary file not shown.
After Width: | Height: | Size: 108 KiB |
Binary file not shown.
After Width: | Height: | Size: 91 KiB |
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
Binary file not shown.
After Width: | Height: | Size: 82 KiB |
|
@ -147,7 +147,7 @@ tst@1und1.de
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Date
|
\begin_layout Date
|
||||||
Version 0.1a-70
|
Version 0.1a-72
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Lowertitleback
|
\begin_layout Lowertitleback
|
||||||
|
@ -15194,6 +15194,17 @@ For better performance, use newer MARS versions from branch
|
||||||
mars0.1a.y
|
mars0.1a.y
|
||||||
\family default
|
\family default
|
||||||
or later.
|
or later.
|
||||||
|
Check the trips and tricks from sections
|
||||||
|
\begin_inset CommandInset ref
|
||||||
|
LatexCommand vref
|
||||||
|
reference "sec:IO-Performance-Tuning"
|
||||||
|
plural "false"
|
||||||
|
caps "false"
|
||||||
|
noprefix "false"
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
.
|
||||||
You may also play around with
|
You may also play around with
|
||||||
\family typewriter
|
\family typewriter
|
||||||
/proc/sys/mars/aio_sync_mode
|
/proc/sys/mars/aio_sync_mode
|
||||||
|
@ -23492,11 +23503,19 @@ natural races
|
||||||
\labelwidthstring 00.00.0000
|
\labelwidthstring 00.00.0000
|
||||||
|
|
||||||
\family typewriter
|
\family typewriter
|
||||||
WriteBack
|
WriteBack[
|
||||||
|
\emph on
|
||||||
|
amount
|
||||||
|
\emph default
|
||||||
|
]
|
||||||
\family default
|
\family default
|
||||||
(cf
|
(cf
|
||||||
\family typewriter
|
\family typewriter
|
||||||
%is-primary{}
|
%is-primary{}
|
||||||
|
\family default
|
||||||
|
and amount via
|
||||||
|
\family typewriter
|
||||||
|
%writeback-rest{}
|
||||||
\family default
|
\family default
|
||||||
) Appears only at actual primaries (whether designated or not), when the
|
) Appears only at actual primaries (whether designated or not), when the
|
||||||
writeback from the RAM buffer is active (see section
|
writeback from the RAM buffer is active (see section
|
||||||
|
@ -23506,7 +23525,13 @@ reference "sec:The-Transaction-Logger"
|
||||||
|
|
||||||
\end_inset
|
\end_inset
|
||||||
|
|
||||||
)
|
).
|
||||||
|
The
|
||||||
|
\emph on
|
||||||
|
amount
|
||||||
|
\emph default
|
||||||
|
is displayed in human readable form, and may be used for a very rough estimatio
|
||||||
|
n of recovery time after a primary crash.
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Labeling
|
\begin_layout Labeling
|
||||||
|
@ -25185,7 +25210,35 @@ resize
|
||||||
|
|
||||||
\begin_layout Standard
|
\begin_layout Standard
|
||||||
\noindent
|
\noindent
|
||||||
The following macros are only meaningful for secondary nodes.
|
The following macros are only meaningful for resources in primary mode:
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Labeling
|
||||||
|
\labelwidthstring 00.00.0000
|
||||||
|
|
||||||
|
\family typewriter
|
||||||
|
writeback-rest
|
||||||
|
\family default
|
||||||
|
Show the amount of data which is already in the transaction logfile, but
|
||||||
|
has not yet been written back to the underlying disk.
|
||||||
|
This may be used for estimation of recovery time after a potential primary
|
||||||
|
crash.
|
||||||
|
The writeback buffer is explained by the graphics at
|
||||||
|
\begin_inset CommandInset ref
|
||||||
|
LatexCommand vref
|
||||||
|
reference "sec:The-Transaction-Logger"
|
||||||
|
plural "false"
|
||||||
|
caps "false"
|
||||||
|
noprefix "false"
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
The following macros are only meaningful for resources in secondary mode.
|
||||||
By information theoretic limits, they can only tell what is
|
By information theoretic limits, they can only tell what is
|
||||||
\emph on
|
\emph on
|
||||||
locally known
|
locally known
|
||||||
|
@ -44963,6 +45016,685 @@ replication networks
|
||||||
Tips and Tricks
|
Tips and Tricks
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Section
|
||||||
|
IO Performance Tuning
|
||||||
|
\begin_inset CommandInset label
|
||||||
|
LatexCommand label
|
||||||
|
name "sec:IO-Performance-Tuning"
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
There
|
||||||
|
\emph on
|
||||||
|
exist
|
||||||
|
\emph default
|
||||||
|
some use cases where MARS
|
||||||
|
\emph on
|
||||||
|
can
|
||||||
|
\emph default
|
||||||
|
deliver better IO performance than a raw block device.
|
||||||
|
However, this cannot be expected
|
||||||
|
\emph on
|
||||||
|
in general
|
||||||
|
\emph default
|
||||||
|
.
|
||||||
|
In some
|
||||||
|
\emph on
|
||||||
|
other
|
||||||
|
\emph default
|
||||||
|
cases the performance may be
|
||||||
|
\emph on
|
||||||
|
lower
|
||||||
|
\emph default
|
||||||
|
than with a
|
||||||
|
\emph on
|
||||||
|
single
|
||||||
|
\emph default
|
||||||
|
local raw device.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
For demonstration, we use the
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
tool from
|
||||||
|
\begin_inset Flex URL
|
||||||
|
status open
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
http://blkreplay.org
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
and a load which has been captured from a
|
||||||
|
\series bold
|
||||||
|
real datacenter
|
||||||
|
\series default
|
||||||
|
(1&1 Ionos ShaHoLin = Shared Hosting Linux).
|
||||||
|
The load already contains a parallelism degree of 20 LXC containers running
|
||||||
|
in parallel at the same iron.
|
||||||
|
This corresponds to about 60,000 web spaces running on 20 Apache instances,
|
||||||
|
already in parallel.
|
||||||
|
In difference to artificial benchmarks (like pure random IO or pure sequential
|
||||||
|
IO), this benchmark is much more close to real server operations, while
|
||||||
|
artificial benchmarks are not meaningful for practice in general, because
|
||||||
|
they can deviate from real server operations by
|
||||||
|
\emph on
|
||||||
|
factors
|
||||||
|
\emph default
|
||||||
|
or even by
|
||||||
|
\series bold
|
||||||
|
orders of magnitude
|
||||||
|
\series default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
In order to determine the limits of the test candidates, the timing of the
|
||||||
|
original workload was converted to a linear ramp-up, simulating an
|
||||||
|
\series bold
|
||||||
|
overloaded
|
||||||
|
\series default
|
||||||
|
system.
|
||||||
|
Otherwise benchmarking would not be possible.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
The following
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
benchmarks were executed on an otherwise unloaded Dell R630 with 40 CPU
|
||||||
|
threads on 2 sockets, 192 GB RAM, a Dell R730 hardware RAID controller
|
||||||
|
with 2 GB BBU cache, and 10 spindles Dell 1.8 TB 2.5 inch SAS disks configured
|
||||||
|
as RAID-6.
|
||||||
|
All data, including the
|
||||||
|
\family typewriter
|
||||||
|
/mars
|
||||||
|
\family default
|
||||||
|
directory, was located on the hardware RAID via LVM2.
|
||||||
|
|
||||||
|
\family typewriter
|
||||||
|
/dev/vginfong/lv-0
|
||||||
|
\family default
|
||||||
|
was assigned a size of 8 TiB.
|
||||||
|
For testing, vanilla kernel 4.9.x with the MARS pre-patch and
|
||||||
|
\family typewriter
|
||||||
|
mars0.1astable72
|
||||||
|
\family default
|
||||||
|
was used.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
The
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
parameters were as follows:
|
||||||
|
\begin_inset listings
|
||||||
|
inline false
|
||||||
|
status open
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
output_label="MARS"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
# input description
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
input_file_list="http://blkreplay.org/loads/natural/1and1/shared-hosting/2016/Sha
|
||||||
|
HoLin_from_bare_metal/x20/shaholin-x20-ramped/shaholin-x20.adjacent.ramped-100.load.
|
||||||
|
gz"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
replay_duration=110
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
speedup=10
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
threads=512
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
cmode=with-conflicts
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
scheduler="noop"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
# hardware setup
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
replay_host_list="icpu5133"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
replay_device_list="/dev/vginfong/lv-0"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
# output description
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
enable_graph=1
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
|
||||||
|
graph_options="--no-static --dynamic"
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
We start with the
|
||||||
|
\series bold
|
||||||
|
raw
|
||||||
|
\series default
|
||||||
|
device
|
||||||
|
\family typewriter
|
||||||
|
/dev/vginfong/lv-0
|
||||||
|
\family default
|
||||||
|
which had a size of 8 TiB.
|
||||||
|
The throughput is about 1418 IOPS, and the latency diagram shows that the
|
||||||
|
system is overloaded, but can cope with that overload:
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\align center
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/blkreplay/MARS.MARS.raw.iosched-noop.nr_request-128.icpu5133.vginfong.lv-0.g01.latency.realtime.png
|
||||||
|
width 100col%
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
As you can see in the filename, the NOOP kernel IO scheduler was used, and
|
||||||
|
the kernel parameter
|
||||||
|
\family typewriter
|
||||||
|
nr_requests
|
||||||
|
\family default
|
||||||
|
was left at its default value of 128.
|
||||||
|
When you read the specs of the Dell R730 hardware RAID controller, you
|
||||||
|
will notice that it can handle a much higher IO request parallelism of
|
||||||
|
almost 1024 requests in parallel.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
So the first natural tuning attempt is
|
||||||
|
\family typewriter
|
||||||
|
nr_requests=1020
|
||||||
|
\family default
|
||||||
|
, in order to release the
|
||||||
|
\begin_inset Quotes eld
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
kernel IO handbrake
|
||||||
|
\begin_inset Quotes erd
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
.
|
||||||
|
This results in an improved throughput of 1562 IOPS, and even the
|
||||||
|
\emph on
|
||||||
|
maximum
|
||||||
|
\emph default
|
||||||
|
latencies are improved, but the
|
||||||
|
\emph on
|
||||||
|
average
|
||||||
|
\emph default
|
||||||
|
latencies are becoming a little bit worse:
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\align center
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/blkreplay/MARS.MARS.raw.iosched-noop.nr_request-1020.icpu5133.vginfong.lv-0.g01.latency.realtime.png
|
||||||
|
width 100col%
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
It is well known since decades that there is a principal tradeoff between
|
||||||
|
throughput and latencies in IO systems.
|
||||||
|
Thus it is not a surprising result.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
On servers, overload situations should be rare, and during overload throughput
|
||||||
|
is typically much more important than latencies, as long as latencies are
|
||||||
|
not exceedingly high.
|
||||||
|
Thus we can recommend
|
||||||
|
\family typewriter
|
||||||
|
nr_requests=1000
|
||||||
|
\family default
|
||||||
|
for production.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
However, some sysadmins might be tempted to question why the NOOP scheduler
|
||||||
|
has been used.
|
||||||
|
On the internet, there are a ton of claims that CFQ is much better.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Well, testing with CFQ instead of NOOP is no problem for
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
However, the result is very surprising.
|
||||||
|
While the IOPS are 1539, which is only a slight decrease which could result
|
||||||
|
from measurement tolerances, the latencies are now turning almost into
|
||||||
|
a disaster:
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\align center
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/blkreplay/MARS.MARS.raw.iosched-cfq.nr_request-1020.icpu5133.vginfong.lv-0.g01.latency.realtime.png
|
||||||
|
width 100col%
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
In production, you should never encounter IO latencies of almost 15 seconds.
|
||||||
|
So what is going wrong here?
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Here is an explanation.
|
||||||
|
A hardware RAID controller
|
||||||
|
\emph on
|
||||||
|
already
|
||||||
|
\emph default
|
||||||
|
has an
|
||||||
|
\emph on
|
||||||
|
internal
|
||||||
|
\emph default
|
||||||
|
IO scheduler.
|
||||||
|
This IO scheduler is hidden in a black box, such that many sysadmins don't
|
||||||
|
know of its existence.
|
||||||
|
If you add another IO scheduler at kernel level, you will have
|
||||||
|
\series bold
|
||||||
|
two different
|
||||||
|
\series default
|
||||||
|
IO schedulers running in parallel, and sometimes taking
|
||||||
|
\series bold
|
||||||
|
contradictory decisions
|
||||||
|
\series default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
These contradictory scheduling decisions may lead to problems in certain
|
||||||
|
cases and scenarios.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
While kernel-level IO schedulers like CFQ certainly have their merits at
|
||||||
|
improving your workstation's IO behaviour, they are counter-productive
|
||||||
|
at servers with hardware RAID controllers.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
So the advice is clear:
|
||||||
|
\series bold
|
||||||
|
switch them off
|
||||||
|
\series default
|
||||||
|
|
||||||
|
\emph on
|
||||||
|
in such a case
|
||||||
|
\emph default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Even if you have a software RAID, check with
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
that any IO schedulers are
|
||||||
|
\emph on
|
||||||
|
really
|
||||||
|
\emph default
|
||||||
|
improving things.
|
||||||
|
When possible, use your real workload, captured with
|
||||||
|
\family typewriter
|
||||||
|
blktrace
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/MatieresCorrosives.png
|
||||||
|
lyxscale 50
|
||||||
|
scale 17
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
Never use a benchmark which only delivers IOPS! As demonstrated, inappropriate
|
||||||
|
IOPS tuning (or choice of inappropriate components) can worsen latencies
|
||||||
|
so much that production can be endangered!
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/lightbulb_brightlit_benj_.png
|
||||||
|
lyxscale 12
|
||||||
|
scale 7
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
Always look at
|
||||||
|
\emph on
|
||||||
|
both
|
||||||
|
\emph default
|
||||||
|
IOPS
|
||||||
|
\emph on
|
||||||
|
and
|
||||||
|
\emph default
|
||||||
|
latencies!
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/MatieresCorrosives.png
|
||||||
|
lyxscale 50
|
||||||
|
scale 17
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\emph on
|
||||||
|
Average
|
||||||
|
\emph default
|
||||||
|
latencies, even when enriched with
|
||||||
|
\emph on
|
||||||
|
standard deviation
|
||||||
|
\emph default
|
||||||
|
, are not enough.
|
||||||
|
Classical statistics does not clearly describe operational problems like
|
||||||
|
|
||||||
|
\series bold
|
||||||
|
hangs
|
||||||
|
\series default
|
||||||
|
and
|
||||||
|
\series bold
|
||||||
|
exceptionally high latency requests
|
||||||
|
\series default
|
||||||
|
, which may occur only rarely, but can then lead to
|
||||||
|
\series bold
|
||||||
|
serious incidents
|
||||||
|
\series default
|
||||||
|
.
|
||||||
|
Use a tool which can clearly display
|
||||||
|
\emph on
|
||||||
|
any
|
||||||
|
\emph default
|
||||||
|
faulty behaviour, such as
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
's
|
||||||
|
\series bold
|
||||||
|
latency diagrams
|
||||||
|
\series default
|
||||||
|
!
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Now we come to benchmarking
|
||||||
|
\family typewriter
|
||||||
|
/dev/mars/lv-0
|
||||||
|
\family default
|
||||||
|
placed on top of
|
||||||
|
\family typewriter
|
||||||
|
/dev/vginfong/lv-0
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
Notice that MARS needs to write all write requests twice: once into the
|
||||||
|
transaction logfile, and a second time by writeback into
|
||||||
|
\family typewriter
|
||||||
|
/dev/vginfong/lv-0
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
So you might expect that performace of
|
||||||
|
\family typewriter
|
||||||
|
/dev/mars/lv-0
|
||||||
|
\family default
|
||||||
|
could be worse than at the underlying raw device.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Nevertheless, the
|
||||||
|
\series bold
|
||||||
|
throughput
|
||||||
|
\series default
|
||||||
|
is now measured 4338 IOPS, which means that performance has
|
||||||
|
\series bold
|
||||||
|
more than doubled
|
||||||
|
\series default
|
||||||
|
.
|
||||||
|
You can also see it by the duration of the benchmark at the x axis.
|
||||||
|
Even the latencies have improved in many cases:
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\align center
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/blkreplay/MARS.MARS.mars.iosched-noop.nr_request-1020.icpu5133.mars.lv-0.g01.latency.realtime.png
|
||||||
|
width 100col%
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
How is it possible to be faster than a RAW device? How can this be explained?
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Look at the graphics and at the explanations from section
|
||||||
|
\begin_inset CommandInset ref
|
||||||
|
LatexCommand vref
|
||||||
|
reference "sec:The-Transaction-Logger"
|
||||||
|
plural "false"
|
||||||
|
caps "false"
|
||||||
|
noprefix "false"
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
.
|
||||||
|
The key to local IO performance is the
|
||||||
|
\series bold
|
||||||
|
re-ordering of writeback
|
||||||
|
\series default
|
||||||
|
according to ascending sector numbers.
|
||||||
|
This can reduce mechanical seek times of hard disks considerably, and even
|
||||||
|
by factors, such that it can over-compensate the doubled writes to the
|
||||||
|
transaction logfile, and even when both are residing at the same RAID set.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Notice: this effect is not only dependent from total RAM size and from the
|
||||||
|
maximum size of the MARS temporary memory buffer (tuning parameter
|
||||||
|
\family typewriter
|
||||||
|
/proc/sys/mars/mars_mem_percent
|
||||||
|
\family default
|
||||||
|
which defaults to a limit of 20%).
|
||||||
|
It is also highly dependent from the actual seek behaviour of the
|
||||||
|
\series bold
|
||||||
|
workload
|
||||||
|
\series default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
For example, if you use
|
||||||
|
\family typewriter
|
||||||
|
dd
|
||||||
|
\family default
|
||||||
|
for sequentially overwriting /dev/mars/lv-0 with a parallelism degree of
|
||||||
|
1, the writeback optimization of MARS cannot be exploited.
|
||||||
|
However,
|
||||||
|
\family typewriter
|
||||||
|
dd
|
||||||
|
\family default
|
||||||
|
is no appropriate benchmarking tool, and has almost nothing to do with
|
||||||
|
real workloads occuring in datacenters, which typically are neither sequential,
|
||||||
|
nor do they have a parallelism degree of only 1.
|
||||||
|
Please don't try to lead any discussions about this: simply use
|
||||||
|
\family typewriter
|
||||||
|
blktrace
|
||||||
|
\family default
|
||||||
|
to capture your real server workload, and compare it to a run of dd.
|
||||||
|
Only if you encounter the same behaviour as
|
||||||
|
\family typewriter
|
||||||
|
dd
|
||||||
|
\family default
|
||||||
|
, only then you can really claim that your workload is like
|
||||||
|
\family typewriter
|
||||||
|
dd
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/MatieresCorrosives.png
|
||||||
|
lyxscale 50
|
||||||
|
scale 17
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
Any assumptions about workloads are very dangerous: they can deviate from
|
||||||
|
practice not only by factors, but sometimes even by
|
||||||
|
\emph on
|
||||||
|
orders of magnitude
|
||||||
|
\emph default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Notice: the writeback optimization of MARS can typically only improve performanc
|
||||||
|
e of HDDs, but not of SSDs.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/lightbulb_brightlit_benj_.png
|
||||||
|
lyxscale 12
|
||||||
|
scale 7
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
By placing
|
||||||
|
\family typewriter
|
||||||
|
/mars
|
||||||
|
\family default
|
||||||
|
onto its own physical device with appropriate speed, you can compensate
|
||||||
|
the doubled writes to some degree.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\noindent
|
||||||
|
\begin_inset Graphics
|
||||||
|
filename images/lightbulb_brightlit_benj_.png
|
||||||
|
lyxscale 12
|
||||||
|
scale 7
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
Depending on the workload and on RAID parameters,
|
||||||
|
\family typewriter
|
||||||
|
/mars
|
||||||
|
\family default
|
||||||
|
may be better placed onto SSDs, or better be placed on HDDs.
|
||||||
|
There is no general rule.
|
||||||
|
Just use
|
||||||
|
\family typewriter
|
||||||
|
blktrace
|
||||||
|
\family default
|
||||||
|
on your real workload, and check several configuration alternatives (also
|
||||||
|
different RAID levels etc) with
|
||||||
|
\family typewriter
|
||||||
|
blkreplay
|
||||||
|
\family default
|
||||||
|
.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Section
|
\begin_layout Section
|
||||||
Avoiding Inappropriate Clustermanager Types for Medium and Long-Distance
|
Avoiding Inappropriate Clustermanager Types for Medium and Long-Distance
|
||||||
Replication
|
Replication
|
||||||
|
|
|
@ -86,7 +86,6 @@ struct trans_logger_hash_anchor {
|
||||||
///////////////////////// global tuning ////////////////////////
|
///////////////////////// global tuning ////////////////////////
|
||||||
|
|
||||||
int trans_logger_completion_semantics = 1;
|
int trans_logger_completion_semantics = 1;
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_completion_semantics);
|
|
||||||
|
|
||||||
int trans_logger_do_crc =
|
int trans_logger_do_crc =
|
||||||
#ifdef CONFIG_MARS_DEBUG
|
#ifdef CONFIG_MARS_DEBUG
|
||||||
|
@ -94,26 +93,22 @@ int trans_logger_do_crc =
|
||||||
#else
|
#else
|
||||||
false;
|
false;
|
||||||
#endif
|
#endif
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_do_crc);
|
|
||||||
|
|
||||||
int trans_logger_mem_usage; // in KB
|
int trans_logger_mem_usage; // in KB
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_mem_usage);
|
|
||||||
|
int trans_logger_pressure_limit = 0;
|
||||||
|
|
||||||
int trans_logger_max_interleave = -1;
|
int trans_logger_max_interleave = -1;
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_max_interleave);
|
|
||||||
|
|
||||||
int trans_logger_resume = 1;
|
int trans_logger_resume = 1;
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_resume);
|
|
||||||
|
|
||||||
int trans_logger_replay_timeout = 1; // in s
|
int trans_logger_replay_timeout = 1; // in s
|
||||||
EXPORT_SYMBOL_GPL(trans_logger_replay_timeout);
|
|
||||||
|
|
||||||
struct writeback_group global_writeback = {
|
struct writeback_group global_writeback = {
|
||||||
.mutex = __RWSEM_INITIALIZER(global_writeback.mutex),
|
.mutex = __RWSEM_INITIALIZER(global_writeback.mutex),
|
||||||
.group_anchor = LIST_HEAD_INIT(global_writeback.group_anchor),
|
.group_anchor = LIST_HEAD_INIT(global_writeback.group_anchor),
|
||||||
.until_percent = 30,
|
.until_percent = 30,
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(global_writeback);
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void add_to_group(struct writeback_group *gr, struct trans_logger_brick *brick)
|
void add_to_group(struct writeback_group *gr, struct trans_logger_brick *brick)
|
||||||
|
@ -2311,13 +2306,28 @@ struct rank_info global_rank_mref_flying[] = {
|
||||||
{ RKI_DUMMY }
|
{ RKI_DUMMY }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Not checking pressure means to always have writeback pressure
|
||||||
|
* by default. No pressure means that writeback may be postponed
|
||||||
|
* when other IO is more important.
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
bool _check_pressure(struct trans_logger_brick *brick)
|
||||||
|
{
|
||||||
|
int active =
|
||||||
|
atomic_read(&brick->any_fly_count) +
|
||||||
|
brick->q_phase[0].q_queued + brick->q_phase[0].q_active;
|
||||||
|
|
||||||
|
return (active > trans_logger_pressure_limit) &&
|
||||||
|
brick->power.button;
|
||||||
|
}
|
||||||
|
|
||||||
static noinline
|
static noinline
|
||||||
int _do_ranking(struct trans_logger_brick *brick)
|
int _do_ranking(struct trans_logger_brick *brick)
|
||||||
{
|
{
|
||||||
struct rank_data *rkd = brick->rkd;
|
struct rank_data *rkd = brick->rkd;
|
||||||
int res;
|
int res;
|
||||||
int i;
|
int i;
|
||||||
int floating_mode;
|
int pressure_mode;
|
||||||
int mref_flying;
|
int mref_flying;
|
||||||
bool delay_callers;
|
bool delay_callers;
|
||||||
|
|
||||||
|
@ -2325,15 +2335,13 @@ int _do_ranking(struct trans_logger_brick *brick)
|
||||||
|
|
||||||
// check the memory situation...
|
// check the memory situation...
|
||||||
delay_callers = false;
|
delay_callers = false;
|
||||||
floating_mode = 1;
|
pressure_mode = 1;
|
||||||
if (atomic_read(&brick->any_fly_count) +
|
if (brick_global_memlimit >= 1024) {
|
||||||
brick->q_phase[0].q_queued + brick->q_phase[0].q_active <= 0) {
|
|
||||||
/* do not change floating_mode */
|
|
||||||
} else if (brick_global_memlimit >= 1024) {
|
|
||||||
int global_mem_used = atomic64_read(&global_mshadow_used) / 1024;
|
int global_mem_used = atomic64_read(&global_mshadow_used) / 1024;
|
||||||
trans_logger_mem_usage = global_mem_used;
|
trans_logger_mem_usage = global_mem_used;
|
||||||
|
|
||||||
floating_mode = (global_mem_used < brick_global_memlimit / 2) ? 0 : 1;
|
if (_check_pressure(brick))
|
||||||
|
pressure_mode = (global_mem_used < brick_global_memlimit / 2) ? 0 : 1;
|
||||||
|
|
||||||
if (global_mem_used >= brick_global_memlimit)
|
if (global_mem_used >= brick_global_memlimit)
|
||||||
delay_callers = true;
|
delay_callers = true;
|
||||||
|
@ -2342,7 +2350,8 @@ int _do_ranking(struct trans_logger_brick *brick)
|
||||||
} else if (brick->shadow_mem_limit >= 8) {
|
} else if (brick->shadow_mem_limit >= 8) {
|
||||||
int local_mem_used = atomic64_read(&brick->shadow_mem_used) / 1024;
|
int local_mem_used = atomic64_read(&brick->shadow_mem_used) / 1024;
|
||||||
|
|
||||||
floating_mode = (local_mem_used < brick->shadow_mem_limit / 2) ? 0 : 1;
|
if (_check_pressure(brick))
|
||||||
|
pressure_mode = (local_mem_used < brick->shadow_mem_limit / 2) ? 0 : 1;
|
||||||
|
|
||||||
if (local_mem_used >= brick->shadow_mem_limit)
|
if (local_mem_used >= brick->shadow_mem_limit)
|
||||||
delay_callers = true;
|
delay_callers = true;
|
||||||
|
@ -2403,7 +2412,7 @@ int _do_ranking(struct trans_logger_brick *brick)
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
// limit mref IO parallelism on transaction log
|
// limit mref IO parallelism on transaction log
|
||||||
ranking_compute(&rkd[0], extra_rank_mref_flying, mref_flying);
|
ranking_compute(&rkd[0], extra_rank_mref_flying, mref_flying);
|
||||||
} else if (i == 1 && !floating_mode) {
|
} else if (i == 1 && !pressure_mode) {
|
||||||
struct trans_logger_brick *leader;
|
struct trans_logger_brick *leader;
|
||||||
int lim;
|
int lim;
|
||||||
|
|
||||||
|
@ -2439,13 +2448,13 @@ int _do_ranking(struct trans_logger_brick *brick)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ranking_compute(&rkd[i], queue_ranks[floating_mode][i], queued);
|
ranking_compute(&rkd[i], queue_ranks[pressure_mode][i], queued);
|
||||||
|
|
||||||
flying = brick->q_phase[i].q_active - brick->q_phase[i].q_active;
|
flying = brick->q_phase[i].q_active - brick->q_phase[i].q_active;
|
||||||
|
|
||||||
MARS_IO("i = %d queued = %d flying = %d\n", i, queued, flying);
|
MARS_IO("i = %d queued = %d flying = %d\n", i, queued, flying);
|
||||||
|
|
||||||
ranking_compute(&rkd[i], fly_ranks[floating_mode][i], flying);
|
ranking_compute(&rkd[i], fly_ranks[pressure_mode][i], flying);
|
||||||
}
|
}
|
||||||
|
|
||||||
// finalize it
|
// finalize it
|
||||||
|
|
|
@ -50,6 +50,7 @@
|
||||||
extern int trans_logger_completion_semantics;
|
extern int trans_logger_completion_semantics;
|
||||||
extern int trans_logger_do_crc;
|
extern int trans_logger_do_crc;
|
||||||
extern int trans_logger_mem_usage; // in KB
|
extern int trans_logger_mem_usage; // in KB
|
||||||
|
extern int trans_logger_pressure_limit;
|
||||||
extern int trans_logger_max_interleave;
|
extern int trans_logger_max_interleave;
|
||||||
extern int trans_logger_resume;
|
extern int trans_logger_resume;
|
||||||
extern int trans_logger_replay_timeout; // in s
|
extern int trans_logger_replay_timeout; // in s
|
||||||
|
|
|
@ -354,6 +354,7 @@ struct ctl_table mars_table[] = {
|
||||||
INT_ENTRY("delay_say_on_overflow",delay_say_on_overflow, 0600),
|
INT_ENTRY("delay_say_on_overflow",delay_say_on_overflow, 0600),
|
||||||
INT_ENTRY("mapfree_period_sec", mapfree_period_sec, 0600),
|
INT_ENTRY("mapfree_period_sec", mapfree_period_sec, 0600),
|
||||||
INT_ENTRY("mapfree_grace_keep_mb", mapfree_grace_keep_mb, 0600),
|
INT_ENTRY("mapfree_grace_keep_mb", mapfree_grace_keep_mb, 0600),
|
||||||
|
INT_ENTRY("logger_pressure_limit", trans_logger_pressure_limit, 0600),
|
||||||
INT_ENTRY("logger_max_interleave", trans_logger_max_interleave, 0600),
|
INT_ENTRY("logger_max_interleave", trans_logger_max_interleave, 0600),
|
||||||
INT_ENTRY("logger_resume", trans_logger_resume, 0600),
|
INT_ENTRY("logger_resume", trans_logger_resume, 0600),
|
||||||
INT_ENTRY("logger_replay_timeout_sec", trans_logger_replay_timeout, 0600),
|
INT_ENTRY("logger_replay_timeout_sec", trans_logger_replay_timeout, 0600),
|
||||||
|
|
|
@ -4387,6 +4387,7 @@ sub eval_fn {
|
||||||
if (/^is[-_]?orphan$/) {
|
if (/^is[-_]?orphan$/) {
|
||||||
my $peer = parse_macro($arg1, $env);
|
my $peer = parse_macro($arg1, $env);
|
||||||
$peer = $$env{"host"} unless $peer;
|
$peer = $$env{"host"} unless $peer;
|
||||||
|
return 0 if eval_fn($env, "is-primary", $peer);
|
||||||
my $replay = get_link($$env{"resdir"} . "/replay-$peer", 1);
|
my $replay = get_link($$env{"resdir"} . "/replay-$peer", 1);
|
||||||
$replay =~ m/^(log-[^,]+),([0-9]*)/;
|
$replay =~ m/^(log-[^,]+),([0-9]*)/;
|
||||||
my $logfile = $$env{"resdir"} . "/" . $1;
|
my $logfile = $$env{"resdir"} . "/" . $1;
|
||||||
|
@ -4475,7 +4476,9 @@ sub eval_fn {
|
||||||
my $what = $1;
|
my $what = $1;
|
||||||
my $is = "is";
|
my $is = "is";
|
||||||
$is = "has" if $what eq "emergency";
|
$is = "has" if $what eq "emergency";
|
||||||
my $lnk = $$env{"resdir"} . "/actual-" . $$env{"host"} . "/$is-$what";
|
my $peer = parse_macro($arg1, $env);
|
||||||
|
$peer = $$env{"host"} unless $peer;
|
||||||
|
my $lnk = $$env{"resdir"} . "/actual-$peer/$is-$what";
|
||||||
$lnk = correct_path($lnk);
|
$lnk = correct_path($lnk);
|
||||||
return get_link($lnk, 1);
|
return get_link($lnk, 1);
|
||||||
}
|
}
|
||||||
|
@ -4568,6 +4571,13 @@ sub eval_fn {
|
||||||
my $what = $1;
|
my $what = $1;
|
||||||
return eval_fn($env, "$what-lognr", "") - eval_fn($env, "replay-lognr", "");
|
return eval_fn($env, "$what-lognr", "") - eval_fn($env, "replay-lognr", "");
|
||||||
}
|
}
|
||||||
|
if (/^writeback[-_]?rest$/) {
|
||||||
|
my $lnk = $$env{"resdir"} . "/replay-" . $$env{"host"};
|
||||||
|
my $link = get_link($lnk, 1);
|
||||||
|
$link =~ m/,([0-9]+)$/;
|
||||||
|
return $1 if defined($1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if (/^(replay|work)[-_]?(pos)$/) {
|
if (/^(replay|work)[-_]?(pos)$/) {
|
||||||
my $what = $1;
|
my $what = $1;
|
||||||
my $op = $2;
|
my $op = $2;
|
||||||
|
@ -5049,7 +5059,7 @@ my %complex_macros =
|
||||||
. "%elsif{%not{%todo-primary{}}}{"
|
. "%elsif{%not{%todo-primary{}}}{"
|
||||||
. "InConsistent"
|
. "InConsistent"
|
||||||
. "}{%is-primary{}}{"
|
. "}{%is-primary{}}{"
|
||||||
. "WriteBack"
|
. "WriteBack[%human-numbers{}{ }{ }{%writeback-rest{}}]"
|
||||||
. "}{"
|
. "}{"
|
||||||
. "Recovery"
|
. "Recovery"
|
||||||
. "}"
|
. "}"
|
||||||
|
@ -5057,7 +5067,7 @@ my %complex_macros =
|
||||||
. "%elsif{%not{%todo-primary{}}}{"
|
. "%elsif{%not{%todo-primary{}}}{"
|
||||||
. "OutDated[%call{outdated-flags}]"
|
. "OutDated[%call{outdated-flags}]"
|
||||||
. "}{%is-primary{}}{"
|
. "}{%is-primary{}}{"
|
||||||
. "WriteBack"
|
. "WriteBack[%human-numbers{}{ }{ }{%writeback-rest{}}]"
|
||||||
. "}{"
|
. "}{"
|
||||||
. "Recovery"
|
. "Recovery"
|
||||||
. "}"
|
. "}"
|
||||||
|
@ -5479,6 +5489,8 @@ my %trivial_globs =
|
||||||
=> "",
|
=> "",
|
||||||
"{sync,fetch,replay,work}-{rest,{almost-,threshold-,}reached,percent,permille,vector}"
|
"{sync,fetch,replay,work}-{rest,{almost-,threshold-,}reached,percent,permille,vector}"
|
||||||
=> "",
|
=> "",
|
||||||
|
"writeback-rest"
|
||||||
|
=> "",
|
||||||
"{sync,fetch,replay}-{rate,remain}"
|
"{sync,fetch,replay}-{rate,remain}"
|
||||||
=> "",
|
=> "",
|
||||||
"replay-basenr"
|
"replay-basenr"
|
||||||
|
|
Loading…
Reference in New Issue