test_suite: new and updated test cases

This commit is contained in:
Frank Liepold 2013-09-17 12:41:17 +02:00 committed by Thomas Schoebel-Theuer
parent 08e5803cd1
commit 6b41af4cd9
72 changed files with 6888 additions and 502 deletions

View File

@ -43,3 +43,18 @@
- 'size' link is actual state
- no target state
*5. memleak: marsadm secondary produces memleaks if executed during "flying writes"
(test /home/fl/mars/test_suite/test_cases/bugs/memleak)
Fixed:
commit 281fa300b27d574b2c11066bf2c4f6114ade1325
Author: Frank Liepold <frank.liepold@1und1.de> 2013-09-16 12:53:22
Branch: master
Follows: light0.1beta0.12
Precedes:
light: workaround flying IO before reporting memory leaks
6. marsadm invalidate does not repair secondary after mars dir full
(test /home/fl/mars/test_suite/test_cases/hardcore/mars_dir_full)

View File

@ -1,7 +1,7 @@
#!/bin/bash
verbose_script=1
checkout_mars_src_directory=/home/fl/mars/mars-testing
checkout_mars_git_branch=master
checkout_mars_git_branch=WIP-transferstatus
checkout_mars_git_tag=
checkout_mars_kernel_src_directory=/home/fl/mars/linux-infong-3.2
checkout_mars_kernel_git_branch=infong-3.2.47-tst-1

View File

@ -18,5 +18,5 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
prepare_list="resource_leave_all lv_config_prepare"
prepare_list="lv_config_prepare"
run_list="lv_config_run"

View File

@ -22,7 +22,9 @@ Dateien
Doku:
- Alle globalen Variablen in .conf - Dateien
- Die Reihenfolge der default-*.conf - Dateien ist u.U. wichtig, wenn die
gegenseitig Variable referenziert werden!!
gegenseitig Variable referenziert werden!! Daher sollte hier eine
entsprechende <modul>_set_globals Funktion definiert sein (siehe z.B.
resource_set_globals)
- Alle globalen Variablen und alle Funktionen mit Modulname als Präfix
- Globale Arrays muessen mit declare -g deklariert werden, da die Variablen via
source innerhalb einer Funktion bekannt gemacht werden

View File

@ -17,5 +17,5 @@ declare -g -A cluster_mars_dir_lv_name_list
eval cluster_mars_dir_lv_name_list=($(x=(${main_host_list[@]/#/[}); y=(${x[@]/%/]=lv-6-100}); echo ${y[@]}))
#
cluster_debugfiles=("$main_mars_directory/5.total.log" )
cluster_debugfile="$main_mars_directory/5.total.log"

View File

@ -18,4 +18,8 @@ lib_rw_file_to_write="file_to_write"
lib_rw_write_and_delete_script="/tmp/write_and_delete.sh.$$"
## part of data device size, which is written in the dd statement in the
## script wich writes the data device
lib_rw_part_of_device_size_written_per_loop=4
lib_rw_number_of_umount_retries_after_stopped_write=4

View File

@ -18,19 +18,23 @@
## Only logical volumes which do not exist or which have a wrong size will
## be recreated.
## the names are built due to the pattern lv-<i>-<size>, where i runs from
## 0 to $resource_count - 1
## the names are built due to the pattern lv-<i>-<size>
lv_config_name_list=(lv-1-2 lv-2-2 lv-3-2 lv-4-2 lv-5-10 lv-6-100)
lv_config_min_lvg_size=200 # GB
lv_config_min_lvg_size=100 # GB
lv_config_host_list[0]="no_target_hosts_given"
## lv_config_delete_lvs_not_needed
##
## if set, all logical volumes belonging to volume group main_lvg_name will
## be deleted.
lv_config_delete_lvs_not_needed=1
declare -g -A lv_config_partition_count_list
lv_config_partition_count_list=(\
[istore-test-bs7]=3 \
[istore-test-bap7]=3 \
)
declare -g -A lv_config_partition_list
lv_config_partition_list=(\
[istore-test-bs7]="$(eval echo /dev/cciss/c1d{0..${lv_config_partition_count_list["istore-test-bs7"]}})" \
[istore-test-bap7]="$(eval echo /dev/cciss/c1d{0..${lv_config_partition_count_list["istore-test-bap7"]}})" \
)
lv_config_stripesize=64K
## option for mkfs.<fs_type> call
declare -g -A lv_config_mkfs_option_list
@ -39,5 +43,5 @@ lv_config_mkfs_option_list=([xfs]="-f" [ext3]="" [ext4]="")
## filesystem specific tune commands
## the string <dev> will be replaced by the actual device name
declare -g -A lv_config_fs_type_tune_cmd_list
lv_config_fs_type_tune_cmd_list=([xfs]="" [ext3]="tune -c 0 <dev>" [ext4]="tune -c 0 <dev>")
lv_config_fs_type_tune_cmd_list=([xfs]="" [ext3]="tune2fs -c 0 <dev>" [ext4]="tune2fs -c 0 <dev>")

View File

@ -3,18 +3,21 @@
## the base directory of all .sh and .conf files
main_base_directory=/home/fl/mars/test_suite
## hosts the tests are running on
## hosts the tests are running on. The first host is initially always used as
## primary host
main_host_list=("istore-test-bs7" "istore-test-bap7")
## using lilo for all hosts
## (constructing the expression ([host1]=lilo [host2]=lilo))
declare -g -A main_host_bootloader_list
eval main_host_bootloader_list=($(x=(${main_host_list[@]/#/[}); y=(${x[@]/%/]=lilo}); echo ${y[@]}))
main_host_bootloader_list=(\
[istore-test-bs7]=grub \
[istore-test-bap7]=lilo \
)
## using lilo label mars for all hosts
declare -g -A main_host_bootloader_label_list
eval main_host_bootloader_label_list=($(x=(${main_host_list[@]/#/[}); y=(${x[@]/%/]=mars}); echo ${y[@]}))
main_host_bootloader_label_list=(\
[istore-test-bs7]=no_label \
[istore-test-bap7]=mars \
)
## associative array containing functions (as indexes) to be called in case of
## unexpected errors or signals. See also lib_exit

View File

View File

@ -0,0 +1,28 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
## size of logical volumes to be created
multires_lv_size=700
## number of logical volumes to be created
multires_lv_count=8

View File

@ -41,7 +41,7 @@ net_time_constant_apply=5
## time for which the amount of data to fetch must be constant to declare
## the fetch process as having stopped
net_time_constant_fetch=10
net_time_constant_fetch=20
## maxtime to wait for apply to stop (after pause-apply)
net_maxtime_apply=60

View File

@ -0,0 +1,152 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## defaults for module perftest
## time for which the amount of data to sync must be constant
## to declare the sync process as inactive
perftest_time_constant_sync=20
## maxtime to wait for sync to stop
perftest_maxtime_sync=300
## flag whether the data device should be written simultaneously
perftest_parallel_writer=0
## amount of data in GB to be written to the data device
perftest_data_in_gb_to_write=10
## size of logfile (in GiB and rounded) after perftest_data_in_gb_to_write are written
perftest_logfile_size_in_gb=10
## time for which the amount of data to fetch must be constant to declare
## the fetch process as having stopped
perftest_time_constant_fetch=10
## maxtime to wait for fetch to stop (after pause-fetch)
perftest_maxtime_fetch=300
## maxtime to wait for apply to stop (after pause-apply)
perftest_maxtime_apply=100
## time for which the amount of data to apply must be constant to declare
## the apply process as having stopped
perftest_time_constant_apply=5
## error tag which will be printed if the runtime diff exceeds the
## required interval
perftest_errortag_result_out_of_bounds="PERFORMANCE-FAILURE"
## maxtime to wait for secondary to become disk state = Uptodate and
## repl state = Replaying in *prepare_for* functions
perftest_maxtime_state_constant=60
## array containing the required results (durations or rate (loops / minute)) of performance tests. A result consists of the
## required value and - comma separated - a tolerance given in percent.
## the index consists of host, action, flag whether a parallel writer to the data device is running, type of result,
## number of resources, sub testcase id
##
## for the sync tests the sub testcase id consist of: nunber of patches, patch length, sync mode
declare -g -A perftest_required_result_list
perftest_required_result_list=(\
["HOST","ACTION","PARALLEL_WRITER","RESULT_TYPE","NO_RESOURCES","GB_TO_FETCH_RESP_TO_APPLY"]=-1 \
["istore-test-bap7","apply",0,"time",1,"10"]=63,10 \
["istore-test-bap7","apply",1,"time",1,"10"]=63,10 \
["istore-test-bap7","fetch",0,"time",1,"10"]=155,10 \
["istore-test-bap7","fetch",1,"time",1,"10"]=223,10 \
["istore-test-bap7","fetch_and_apply",0,"time",1,"5"]=267,10 \
["istore-test-bap7","fetch_and_apply",1,"time",1,"5"]=336,10 \
["HOST","ACTION","PARALLEL_WRITER","RESULT_TYPE","NO_RESOURCES","GB_FETCHED_RESP_APPLIED"]=-1 \
["istore-test-bs7","write_while_apply",0,"loops_per_min",1,"10"]=16,10 \
["istore-test-bs7","write_while_fetch",0,"loops_per_min",1,"10"]=16,10 \
["istore-test-bs7","write_while_fetch_and_apply",0,"loops_per_min",1,"5"]=15,10 \
["HOST","ACTION","PARALLEL_WRITER","RESULT_TYPE","NO_RESOURCES","DURATION_OF_WRITE:DEVICE_SIZE:PART_OF_DEVICE_SIZE_WRITTEN_PER_LOOP"]=-1 \
["istore-test-bs7","write",0,"loops_per_min",1,"30:2:4"]=24,10 \
["HOST","ACTION","PARALLEL_WRITER","RESULT_TYPE","NO_RESOURCES","DURATION_OF_WRITE:DEVICE_SIZE:PART_OF_DEVICE_SIZE_WRITTEN_PER_LOOP:DIVISION_OF_DATA_AND_MARS_DEVICES"]=-1 \
["istore-test-bap7","write",0,"loops_per_min",1,"60:2:8:same_controller"]=28,10 \
["istore-test-bap7","write",0,"loops_per_min",1,"60:2:8:separated_and_mars_dev_with_bbu_cache"]=22,10 \
["istore-test-bap7","write",0,"loops_per_min",1,"60:2:8:separated_and_mars_dev_without_bbu_cache"]=10,10 \
["HOST","ACTION","PARALLEL_WRITER","RESULT_TYPE","NO_RESOURCES","NO_PATCHES:PATCH_LENGTH:SYNC_MODE"]=-1 \
["istore-test-bap7","sync",0,"time",1,"10:1:rsync"]=42,10 \
["istore-test-bap7","sync",0,"time",1,"10:50000:rsync"]=43,10 \
["istore-test-bap7","sync",0,"time",1,"4:500000:rsync"]=45,10 \
["istore-test-bap7","sync",0,"time",1,"2:1000000:rsync"]=49,10 \
["istore-test-bap7","sync",0,"time",1,"10:1:fast_sync"]=10,10 \
["istore-test-bap7","sync",0,"time",1,"10:50000:fast_sync"]=10,10 \
["istore-test-bap7","sync",0,"time",1,"4:500000:fast_sync"]=13,10 \
["istore-test-bap7","sync",0,"time",1,"2:1000000:fast_sync"]=18,10 \
["istore-test-bap7","sync",1,"time",1,"10:1:fast_sync"]=33,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"10:1:fast_sync"]=11,10 \
["istore-test-bap7","sync",1,"time",1,"10:50000:fast_sync"]=83,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"10:50000:fast_sync"]=12,10 \
["istore-test-bap7","sync",1,"time",1,"4:500000:fast_sync"]=60,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"4:500000:fast_sync"]=10,10 \
["istore-test-bap7","sync",1,"time",1,"2:1000000:fast_sync"]=56,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"2:1000000:fast_sync"]=10,10 \
["istore-test-bap7","sync",0,"time",1,"10:1:no_fast_sync"]=26,10 \
["istore-test-bap7","sync",0,"time",1,"10:50000:no_fast_sync"]=40,10 \
["istore-test-bap7","sync",0,"time",1,"4:500000:no_fast_sync"]=82,10 \
["istore-test-bap7","sync",0,"time",1,"2:1000000:no_fast_sync"]=31,10 \
["istore-test-bap7","sync",1,"time",1,"10:1:no_fast_sync"]=31,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"10:1:no_fast_sync"]=10,10 \
["istore-test-bap7","sync",1,"time",1,"10:50000:no_fast_sync"]=69,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"10:50000:no_fast_sync"]=10,10 \
["istore-test-bap7","sync",1,"time",1,"4:500000:no_fast_sync"]=71,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"4:500000:no_fast_sync"]=10,10 \
["istore-test-bap7","sync",1,"time",1,"2:1000000:no_fast_sync"]=78,10 \
["istore-test-bs7","write_while_sync",0,"loops_per_min",1,"2:1000000:no_fast_sync"]=10,10 \
)
## flag to check net throughput while waiting for fetch or sync to stop
perftest_check_net_throughput=1
## flag whether the performance influence of splitting /mars device and data
## device on different raid controllers is to be measured
perftest_division_mars_device_data_device=0
## after this number of checks whether fetch or sync have stopped, the net throughput is checked
perftest_check_net_throughput_intervall=2
## port on which nttcp tests the network connection
perftest_nttcp_port=12346
## command to start nttcp receiver
perftest_nttcp_start_cmd="nttcp -p $perftest_nttcp_port -r -i"
## type of result of tests
perftest_result_type="time"

View File

@ -12,9 +12,6 @@
##
## this module administrates the test resources
## number of resources
resource_count="6"
## the names of the resources used in the current test
resource_name_list=(lv-1-2)
@ -22,16 +19,30 @@ resource_name_list=(lv-1-2)
## maps resource name lv-<i>-<size> to /mnt/mars-test-<i>
declare -g -A resource_mount_point_list
eval resource_mount_point_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=/mnt/mars-test-$(($i + 1)) ";done))
## resource dir of resource res is given by
## $main_mars_directory/resource-$res
declare -g -A resource_dir_list
eval resource_dir_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=$main_mars_directory/resource-${resource_name_list[$i]} ";done))
## filesystem type of resource devices is set to xfs
declare -g -A resource_fs_type_list
eval resource_fs_type_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=xfs ";done))
function resource_set_globals
{
eval resource_mount_point_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=/mnt/mars-test-$(($i + 1)) ";done))
eval resource_dir_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=$main_mars_directory/resource-${resource_name_list[$i]} ";done))
eval resource_fs_type_list=($(for i in ${!resource_name_list[*]};do printf "[${resource_name_list[$i]}]=xfs ";done))
## messages to be grepped for in abnormal situations
resource_mars_dir_full_err_pattern_list[0]='DISK SPACE IS EXTREMELY LOW on '"${resource_dir_list[${resource_name_list[0]}]}"
resource_mars_dir_full_err_pattern_list[1]='EMERGENCY MODE on '"${resource_dir_list[${resource_name_list[0]}]}"'.*stopped transaction logging.*created a hole in the logfile sequence'
}
resource_set_globals
## mounting of the data device must fail on a secondary resource. Because
@ -75,9 +86,7 @@ resource_big_file=$main_mars_directory/mars_test_bigfile
declare -g -A resource_msgfile_list
resource_msgfile_list=([err]='3.error.status' [warn]='2.warn.status')
## messages to be grepped for in abnormal situations
resource_mars_dir_full_err_pattern_list[0]='DISK SPACE IS EXTREMELY LOW on '"${resource_dir_list[${resource_name_list[0]}]}"
resource_mars_dir_full_err_pattern_list[1]='EMERGENCY MODE on '"${resource_dir_list[${resource_name_list[0]}]}"'.*stopped transaction logging.*created a hole in the logfile sequence'
## file used to reset after emergency mode
resource_proc_sys_mars_reset_emergency_file="/proc/sys/mars/mars_reset_emergency"
resource_mars_dir_full_warn_pattern_list[0]='EMERGENCY: the space on '$main_mars_directory'/ is very low.'

View File

@ -1,66 +0,0 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## defaults for module synctest
## list of lengths of the patch (in KB) the data device will be patched with
synctest_patch_length_list=(1 50000 500000 1000000)
## number of patches the data device will be patched with
## must correspond to synctest_patch_length_list
synctest_number_of_patches_list=(10 10 4 2)
## time for which the amount of data to sync must be constant
## to declare the sync process as inactive
synctest_time_constant_sync=20
## maxtime to wait for sync to stop
synctest_maxtime_sync=300
## sync modus
synctest_fast_sync=1
## proc file to switch sync modus
synctest_sync_mode_proc_file='/proc/sys/mars/do_fast_fullsync'
## use rsync instead of mars sync
synctest_use_mars_sync=1
## data file to user for rsync
synctest_data_file=$main_mars_directory/file_to_sync
## flag whether the data device should be written while syncing
synctest_parallel_writer=0
## time to run the data device writer to determine write rate
synctest_write_time=10
## time for which the amount of data to fetch must be constant to declare
## the fetch process as having stopped
synctest_time_constant_fetch=10
## maxtime to wait for fetch to stop (after pause-fetch)
synctest_maxtime_fetch=300
## maxtime to wait for apply to stop (after pause-apply)
synctest_maxtime_apply=100
## time for which the amount of data to apply must be constant to declare
## the apply process as having stopped
synctest_time_constant_apply=5

View File

@ -24,11 +24,29 @@ function execute_tests
fi
done
local error_ocurred=0
local msg
local grep_cmd='grep PERFORMANCE-FAILURE '$logfile
if [ ${#failed_tests[*]} -ne 0 ];then
local to msg
msg="tests failed on $(hostname) (Script $0):
${failed_tests[@]}
for details see $logfile on $(hostname)"
error_ocurred=1
local msg
msg="tests failed on $(hostname) (Script $0):"$'\n'
msg+=$(echo ${failed_tests[@]} | tr " " $'\n')$'\n'
fi
if $grep_cmd >/dev/null; then
error_ocurred=1
msg+=$'\n'"Performance-Failures:"$'\n'
msg+="$($grep_cmd)"$'\n'
fi
grep_cmd='grep ERROR-FILE '$logfile
if $grep_cmd >/dev/null; then
error_ocurred=1
msg+=$'\n'"Error-Files:"$'\n'
msg+="$($grep_cmd)"$'\n'
fi
if [ $error_ocurred -eq 1 ]; then
local to
msg+="for details see $logfile on $(hostname)"
for to in "${mail_to[@]}"; do
sendEmail -m "$msg" -f $mail_from -t $to -u "failed mars tests" -s $mail_server
done
@ -68,7 +86,7 @@ done
# main
echo Start $(basename $0) at $(date)
logfile="/home/fl/tmp/cronjob_mars"
logfile="/home/fl/tmp/cronjob_mars.log"
eval $(ssh-agent)
~/tools/sx
@ -95,6 +113,7 @@ test_cases/destroy_secondary_logfile \
test_cases/admin/resizing \
test_cases/admin/logrotate \
test_cases/admin/logdelete \
test_cases/bugs/memleak \
test_cases/perf \
test_cases/admin/switch2primary \
test_cases/admin/datadev_full \
@ -105,7 +124,7 @@ test_cases/stabil/crash/crash_primary \
test_cases/stabil/crash/crash_primary_logger_comletion_semantics__aio_sync_mode \
test_cases/stabil/crash/crash_primary_logger_completion_semantics \
test_cases/stabil/crash/crash_primary_aio_sync_mode \
test_cases/hardcore/aio_filehandle \
test_cases/bugs/aio_filehandle \
build_test_environment/resource/leave_resource \
)

View File

@ -106,7 +106,7 @@ function install_mars_update_bootloader_on_target_hosts
lib_remote_idfile $host lilo || lib_exit 1
install_mars_activate_kernel_to_boot_with_lilo $host $label_name
;;
*) lib_exit 1 "bootloader $boot_loader not supported"
*) echo "hint: for bootloader $boot_loader on $host no action defined"
;;
esac
done

View File

@ -12,29 +12,53 @@ function lv_config_name_matches_our_list
return 1
}
function lv_exists
{
local host=$1 lv_name=$2
local dev=$(lv_config_get_lv_device $lv_name)
lib_vmsg " checking whether lv $lv_name (dev=$dev) on $host exists"
lib_remote_idfile $host "lvdisplay --noheadings -C $dev -o lv_name"
return $?
}
function lv_config_prepare
{
lv_config_check_variables
if (( $lv_config_delete_lvs_not_needed )); then
local host
for host in "${main_host_list[@]}"; do
local lv_name_list
lv_name_list=$(lib_remote_idfile $host \
lvdisplay $main_lvg_name -C --noheadings -o lv_name) \
|| lib_exit 1
local lv_name
for lv_name in ${lv_name_list[@]}; do
if lv_config_name_matches_our_list $lv_name; then
continue
else
lib_vmsg " deleting lv $main_lvg_name/$lv_name on $host"
lib_remote_idfile $host \
lvremove -f $main_lvg_name/$lv_name || lib_exit 1
fi
done
done
fi
resource_leave_all
cluster_umount_mars_dir_all
lv_config_delete_vg
}
function lv_config_delete_vg
{
local host out i rc
for host in "${main_host_list[@]}"; do
lib_vmsg " removing lvs in $main_lvg_name on $host"
out="$(lib_remote_idfile $host lvdisplay -c $main_lvg_name)"
rc=$?
# rc=5 means: vg does not exist
if [ $rc -ne 5 -a $rc -ne 0 ]; then
lib_exit 1
fi
for i in $(echo "$out" | awk -F':' '$2=="'$main_lvg_name'"{print $1}')
do
lib_vmsg "lvremove $host:$i"
lib_remote_idfile $host lvremove -f $i || lib_exit 1
done
lib_vmsg " removing pvs in $main_lvg_name on $host"
out="$(lib_remote_idfile $host pvdisplay -c)" || lib_exit 1
for i in $(echo "$out" | \
awk -F':' '$3=="'$main_lvg_name'"{print $1":"$2}
$2=="'$main_lvg_name'"{print $1}'
)
do
lib_vmsg " removing pvs $host:$i"
lib_remote_idfile $host pvremove -ff -y $i || lib_exit 1
done
done
}
function lv_config_extract_int_from_lv_size
{
@ -44,29 +68,34 @@ function lv_config_extract_int_from_lv_size
function lv_config_check_volume_group_existence_and_size
{
local host lvg_size
local host lvg_size rc
for host in "${main_host_list[@]}"; do
lib_vmsg " checking volume group $main_lvg_name on $host"
lvg_size=$(lib_remote_idfile $host vgs --noheadings \
--units G -o vg_size $main_lvg_name) || lib_exit 1
--units G -o vg_size $main_lvg_name)
rc=$?
if [ $rc -ne 0 ];then
lib_vmsg " vg $host:$main_lvg_name will be created"
return
fi
# 11.1G -> 11
lvg_size=$(lv_config_extract_int_from_lv_size $lvg_size)
[ "$lvg_size" -ge $lv_config_min_lvg_size ] || \
lib_exit 1 "size $lvg_size of volume group $main_lvg_name not >= 100"
lib_exit 1 "size $lvg_size of volume group $main_lvg_name not >= $lv_config_min_lvg_size"
done
}
function lv_config_check_variables
{
if [ $resource_count -eq 0 ]; then
if [ ${#lv_config_name_list[*]} -eq 0 ]; then
lib_exit 1 "number of logical volumes to be created = 0"
fi
local lv_name sum
for lv_name in ${lv_config_name_list[@]}; do
let sum=$(($sum + $(lv_config_get_lv_size $lv_name)))
done
if [ $sum -gt $lv_config_min_lvg_size ];then
lib_exit 1 "sum of sizes in lv_config_name_list = $sum exceeds $lv_config_min_lvg_size"
if [ $sum -le $lv_config_min_lvg_size ];then
lib_exit 1 "sum of sizes in lv_config_name_list = $sum smaller than $lv_config_min_lvg_size"
fi
lib_check_access_to_remote_hosts "$main_ssh_idfile_opt" \
@ -103,14 +132,11 @@ function lv_config_get_size_logical_volume
echo $lv_size
}
# lv_config_delete_lvs_not_needed=1
#
# lvdisplay /dev/vg-mars/huhu
function lv_config_recreate_logical_volumes
{
local host lv_name lv_size lv_dev lv_size_act rc lv_must_be_recreated
local lv_size_tolerance lv_size_diff
lv_config_create_vg
for host in "${main_host_list[@]}"; do
for lv_name in "${lv_config_name_list[@]}"; do
lv_must_be_recreated=0
@ -132,15 +158,39 @@ function lv_config_recreate_logical_volumes
fi
fi
if (( lv_must_be_recreated )); then
lib_vmsg " creating lv $lv_name (size $lv_size G) on $host"
lib_remote_idfile $host \
lvcreate -L ${lv_size}G -n $lv_name $main_lvg_name \
|| lib_exit 1
lv_config_create_lv $host $lv_name
fi
done
done
}
function lv_config_create_lv
{
local host=$1 lv_name=$2
local size=$(lv_config_get_lv_size $lv_name)
lib_vmsg " creating lv $lv_name (size $size G) on $host"
lib_remote_idfile $host \
lvcreate -n $lv_name \
-i ${lv_config_partition_count_list[$host]} \
-I $lv_config_stripesize -L ${size}G $main_lvg_name \
|| lib_exit 1
}
function lv_config_create_vg
{
local host
for host in "${main_host_list[@]}"; do
local lv
local partitions="${lv_config_partition_list[$host]}"
lib_vmsg " creating $main_lvg_name on $host (partitions=$partitions)"
lib_remote_idfile $host vgcreate $main_lvg_name $partitions \
|| lib_exit 1
for lv in ${lv_config_name_list[@]}; do
lv_config_create_lv $host $lv
done
done
}
function lv_config_get_lv_size
{
local lv_name=$1
@ -158,3 +208,15 @@ function lv_config_run
{
lv_config_recreate_logical_volumes
}
function lv_config_get_dm_dev
{
local host=$1 dev=$2
local dm_dev rc
dm_dev=$(lib_remote_idfile $host "dmsetup info -C --noheadings -o name $dev")
rc=$?
if [ $rc -ne 0 ]; then
return $rc
fi
lib_remote_idfile $host "ls /dev/mapper/$dm_dev" || lib_exit 1
}

View File

@ -30,7 +30,7 @@ function cluster_umount_data_device_all
function cluster_rmmod_mars_all
{
local host
mount_umount_data_device
mount_umount_data_device_all
for host in "${main_host_list[@]}"; do
cluster_rmmod_mars $host
done
@ -46,6 +46,9 @@ function cluster_rmmod_mars
function cluster_clear_mars_dir_all
{
local host
if [ -z "$main_mars_directory" ]; then
lib_exit 1 " variable main_mars_directory empty"
fi
for host in "${main_host_list[@]}"; do
lib_vmsg " clearing directory $host:$main_mars_directory"
lib_remote_idfile $host "rm -rf $main_mars_directory/*" || lib_exit 1
@ -63,27 +66,48 @@ function cluster_insert_mars_module_all
function cluster_insert_mars_module
{
local host=$1
cluster_mount_mars_dir $host
cluster_create_debugfiles $host
lib_vmsg " inserting mars module on $host"
lib_remote_idfile $host 'grep -w "^mars" /proc/modules || modprobe mars' || lib_exit 1
}
function cluster_umount_mars_dir_all
{
local host
for host in "${main_host_list[@]}"; do
if mount_is_dir_mountpoint $host $main_mars_directory; then
mount_umount $host "device_does_not_matter" $main_mars_directory
fi
done
}
function cluster_mount_mars_dir
{
local host=$1 dev=$2
if mount_is_dir_mountpoint $host $main_mars_directory
then
mount_umount $host "device_does_not_matter" $main_mars_directory
local host=$1
local dev="$(lv_config_get_lv_device ${cluster_mars_dir_lv_name_list[$host]})"
local already_mounted_correctly=0
if mount_is_dir_mountpoint $host $main_mars_directory; then
local mount_point
if mount_is_device_mounted $host $dev "mount_point" \
&& [ "$mount_point" == "$main_mars_directory" ]
then
already_mounted_correctly=1
else
mount_umount $host "device_does_not_matter" $main_mars_directory
fi
fi
if [ $already_mounted_correctly -eq 0 ];then
lib_remote_check_device_fs $host $dev $main_mars_fs_type
mount_mount $host $dev $main_mars_directory $main_mars_fs_type || lib_exit 1
fi
mount_mount $host $dev $main_mars_directory $main_mars_fs_type || lib_exit 1
}
function cluster_mount_mars_dir_all
{
local host dev
for host in "${main_host_list[@]}"; do
dev="$(lv_config_get_lv_device ${cluster_mars_dir_lv_name_list[$host]})"
cluster_mount_mars_dir $host $dev
cluster_mount_mars_dir $host
done
}
@ -197,15 +221,15 @@ function cluster_check_devices_all
function cluster_create_debugfiles
{
local host=$1
lib_vmsg " creating debugfiles ${cluster_debugfiles[@]}"
lib_remote_idfile $host "touch ${cluster_debugfiles[@]}" || lib_exit 1
lib_vmsg " creating debugfile $cluster_debugfile"
lib_remote_idfile $host "touch $cluster_debugfile" || lib_exit 1
}
function cluster_remove_debugfiles
{
local host=$1
lib_vmsg " removing debugfiles ${cluster_debugfiles[@]}"
lib_remote_idfile $host "rm -f ${cluster_debugfiles[@]}" || lib_exit 1
lib_vmsg " removing debugfiles $cluster_debugfile"
lib_remote_idfile $host "rm -f $cluster_debugfile" || lib_exit 1
}
function cluster_check_variables

View File

@ -29,8 +29,8 @@ function resource_prepare
function resource_check_variables
{
if ! expr "${lv_config_name_list[*]}" : "${resource_name_list[*]}" \
>/dev/null
if ! expr "${lv_config_name_list[*]}" \
: "\(\(.* \)*${resource_name_list[*]}\( .*\)*\$\)" >/dev/null
then
lib_exit 1 "resource_name_list = '${resource_name_list[*]}' is no substring of '${lv_config_name_list[*]}' = lv_config_name_list"
fi
@ -68,7 +68,7 @@ function resource_leave
{
local host=$1 res=$2
if resource_is_data_device_mounted $host $res; then
local dev=$(resource_get_name_data_device $res)
local dev=$(resource_get_data_device $res)
mount_umount $host $dev ${resource_mount_point_list[$res]} || lib_exit 1
fi
@ -80,7 +80,7 @@ function resource_leave
done
resource_do_after_leave_loops $host $res
fi
resource_rm_resource_dir $res
resource_rm_resource_dir_all $res
}
function resource_do_after_leave_loops
@ -180,13 +180,13 @@ function resource_multi_res_run
:
}
function resource_fill_data_device
function resource_fill_mars_dir
{
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
local res=${resource_name_list[0]}
local dev=$(lv_config_get_lv_device $res)
local data_dev=$(resource_get_name_data_device $res)
local data_dev=$(resource_get_data_device $res)
local data_dev_size=$(lv_config_get_lv_size $res)
local mars_lv_name=${cluster_mars_dir_lv_name_list[$primary_host]}
local mars_dev=$(lv_config_get_lv_device $mars_lv_name)
@ -214,11 +214,18 @@ function resource_fill_data_device
lib_remote_idfile $primary_host "rm -f $resource_big_file" || lib_exit 1
fi
resource_recreate_all
resource_check_proc_sys_mars_emergency_file $primary_host
resource_resize_mars_dir $primary_host $mars_dev $(($mars_dev_size + 10))
cluster_insert_mars_module $primary_host
echo TODO; return
marsadm_do_cmd $host "down" $res
marsadm_do_cmd $host "invalidate" $res
lib_wait_for_initial_end_of_sync $secondary_host $res \
$resource_maxtime_initial_sync \
$resource_time_constant_initial_sync \
"time_waited"
# TODO bischen was schreiben
lib_rw_compare_checksums $primary_host $secondary_host $dev 0 \
"primary_cksum" "secondary_cksum"
if [ $resource_use_data_dev_writes_to_fill_mars_dir -eq 1 ]; then
@ -229,6 +236,27 @@ function resource_fill_data_device
fi
}
function resource_resize_mars_dir
{
local host=$1 mars_dev=$2 new_size=$3
lib_vmsg " resizing $host:$mars_dev to $new_size GB"
cluster_rmmod_mars $host
lv_config_resize_device $host $mars_dev $new_size
lib_remote_idfile $host "resize2fs $mars_dev" || lib_exit 1
}
function resource_check_proc_sys_mars_emergency_file
{
local host=$1 value
lib_vmsg " checking value in $host:$resource_proc_sys_mars_reset_emergency_file"
value=$(lib_remote_idfile $host \
"cat $resource_proc_sys_mars_reset_emergency_file") || lib_exit 1
if [ $value -ne 1 ];then
lib_exit 1 "wrong value $value (!= 1) in $host:$resource_proc_sys_mars_reset_emergency_file"
fi
}
# compare actual data on data device with data written in
# resource_dd_until_mars_dir_full
function resource_check_data_on_data_device
@ -307,16 +335,26 @@ function resource_up
return $rc
}
function resource_rm_resource_dir
function resource_rm_resource_dir_all
{
local res=$1 host
local res_dir=${resource_dir_list[$res]}
if [ -z "$res_dir" ];then
lib_exit 1 " to resource $res no resource dir found in resource_dir_list"
fi
cluster_rmmod_mars_all
for host in "${main_host_list[@]}"; do
lib_vmsg " removing $host:$res_dir/*"
lib_remote_idfile $host "rm -rf $res_dir/*" || lib_exit 1
local mars_lv=${cluster_mars_dir_lv_name_list[$host]}
local mars_dev=$(lv_config_get_lv_device $mars_lv)
lib_vmsg " check whether mars device $host:$mars_dev exists"
if lib_remote_idfile $host "ls -l $mars_dev"; then
cluster_mount_mars_dir $host
lib_vmsg " removing $host:$res_dir"
lib_remote_idfile $host "rm -rf $res_dir" || lib_exit 1
lib_remote_idfile $host "mkdir $res_dir" || lib_exit 1
fi
done
}
@ -372,7 +410,7 @@ function resource_check_data_link
lib_linktree_check_link $host "$link" $dev
}
function resource_get_name_data_device
function resource_get_data_device
{
local res=$1
echo /dev/mars/$res
@ -381,14 +419,15 @@ function resource_get_name_data_device
function resource_is_data_device_mounted
{
local host=$1 res=$2 rc
local dev=$(resource_get_name_data_device $res)
mount_is_device_mounted $host $dev
local dev=$(resource_get_data_device $res)
local mount_point
mount_is_device_mounted $host $dev "mount_point"
}
function resource_check_data_device_after_create
{
local host=$1 res=$2
local dev=$(resource_get_name_data_device $res)
local dev=$(resource_get_data_device $res)
local waited=0 rc
while true; do
lib_vmsg " checking existence of device $dev on $host"
@ -409,12 +448,13 @@ function resource_check_data_device_after_create
function resource_check_mount_and_rmmod_possibilities
{
local host=$1 res=$2
local data_dev=$(resource_get_name_data_device $res)
local data_dev=$(resource_get_data_device $res)
local mount_point
resource_check_mount_point_directories $host
if ! mount_is_device_mounted $host $data_dev ${resource_mount_point_list[$res]}
if ! mount_is_device_mounted $host $data_dev "mount_point"
then
mount_mount $host $data_dev ${resource_mount_point_list[$res]} \
${resource_fs_type_list[$res]}
${resource_fs_type_list[$res]} || lib_exit 1
fi
resource_check_whether_rmmod_mars_fails $host $data_dev
mount_umount $host $data_dev ${resource_mount_point_list[$res]} || \

View File

@ -0,0 +1,74 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
function multires_prepare
{
cluster_umount_mars_dir_all
lv_config_prepare
lv_config_run
cluster_mount_mars_dir_all
resource_rm_resource_dir_all
cluster_create
cluster_join
cluster_insert_mars_module_all
multires_create_resources_all
}
function multires_create_resources_all
{
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
local res lv_dev count=0 maxwait=20
for res in ${lv_config_name_list[@]}; do
local lv_dev=$(lv_config_get_lv_device $res)
marsadm_do_cmd $primary_host "create-resource --force" "$res $lv_dev"
marsadm_do_cmd $primary_host "wait-resource" "$res is-device"
lib_remote_idfile $primary_host marsview $res
marsadm_do_cmd $secondary_host "join-resource --force" "$res $lv_dev"
lib_remote_idfile $secondary_host marsview $res
while true;do
if marsview_check $secondary_host $res "repl" "-S..-" \
&& marsview_check $secondary_host $res "disk" "Inconsistent"
then
marsadm_do_cmd $secondary_host "pause-sync" $res
marsadm_do_cmd $secondary_host "fake-sync" $res
marsadm_do_cmd $secondary_host "resume-sync" $res
continue
fi
if marsview_check $secondary_host $res "repl" "-SFA-" \
&& marsview_check $secondary_host $res "disk" "Uptodate"
then
break;
fi
sleep 1
let count+=1
if [ $count -ge $maxwait ]; then
lib_exit 1 "maxwait $maxwait exceeded"
fi
lib_vmsg " waited $count for secondary to become up to date"
if marsview_check $secondary_host $res "disk" "Detached"; then
lib_vmsg " try to join $res again"
marsadm_do_cmd $secondary_host "join-resource --force" $res \
$lv_dev
fi
done
done
}

View File

@ -62,11 +62,11 @@ function remote_dev_run
lib_rw_stop_writing_data_device $writer_script "write_count"
local lilo_label_name="${main_host_bootloader_label_list[$secondary_host]}"
local boot_label_name="${main_host_bootloader_label_list[$secondary_host]}"
local mars_dev=$(lv_config_get_lv_device ${cluster_mars_dir_lv_name_list[$secondary_host]})
crash_reboot $secondary_host "" $mars_dev $crash_maxtime_reboot \
$crash_maxtime_to_become_unreachable \
$lilo_label_name
$boot_label_name
remote_dev_remove_magic_links $primary_host $secondary_host
}
@ -75,7 +75,7 @@ function remote_dev_remove_magic_links
local hosts="$@" host
# to guarantee persistence of removal on all hosts
mount_umount_data_device
mount_umount_data_device_all
cluster_rmmod_mars_all
for host in $hosts; do

View File

@ -97,7 +97,7 @@ function resize_do_resize
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 res=$3 dev=$4 data_dev_size_new=$5
local mars_data_dev_size_new=$6
local host time_waited
local host time_waited net_throughput
for host in $primary_host $secondary_host; do
lv_config_resize_device $host $dev $data_dev_size_new
@ -120,7 +120,8 @@ function resize_do_resize
lib_wait_until_action_stops "syncstatus" $secondary_host $res \
$resize_maxtime_sync \
$resize_time_constant_sync "time_waited"
$resize_time_constant_sync "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: sync time: $time_waited"
}
@ -132,6 +133,7 @@ function resize_resize_to_orig_size
resource_leave_all
for host in $primary_host $secondary_host; do
lv_config_resize_device $host $dev $data_dev_size_orig
lib_remote_check_device_fs $host $dev ${resource_fs_type_list[$res]}
done
}

View File

@ -52,7 +52,7 @@ function switch2primary_run
count=0
while true; do
mount_umount_data_device
mount_umount_data_device_all
rc=$?
if [ $rc -ne 0 ]; then
let count+=1

View File

@ -25,10 +25,11 @@ function crash_run
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
local mars_dev=$(lv_config_get_lv_device ${cluster_mars_dir_lv_name_list[$primary_host]})
local lilo_label_name="${main_host_bootloader_label_list[$primary_host]}"
local boot_label_name="${main_host_bootloader_label_list[$primary_host]}"
local res=${resource_name_list[0]}
local dev=$(lv_config_get_lv_device $res)
local writer_pid writer_script logfile length_logfile time_waited
local net_throughput
local waited=0 error_ocurred=0
mount_mount_data_device
@ -46,7 +47,7 @@ function crash_run
$crash_aio_sync_mode
crash_reboot $primary_host $secondary_host $mars_dev $crash_maxtime_reboot \
$crash_maxtime_to_become_unreachable \
$lilo_label_name
$boot_label_name
lib_linktree_print_linktree $primary_host
@ -60,12 +61,14 @@ function crash_run
$crash_maxtime_state_constant
lib_wait_until_action_stops "syncstatus" $secondary_host $res \
$crash_maxtime_sync \
$crash_time_constant_sync "time_waited"
$crash_time_constant_sync "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: sync time: $time_waited"
lib_wait_until_fetch_stops "crash" $secondary_host $primary_host $res \
"logfile" "length_logfile" "time_waited"
"logfile" "length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: fetch time: $time_waited"
@ -91,13 +94,14 @@ function crash_run
function crash_write_data_device_and_calculate_checksums
{
local primary_host=$1 secondary_host=$2 res=$3 dev=$4
local writer_pid writer_script write_count time_waited
local writer_pid writer_script write_count time_waited net_throughput
mount_mount_data_device
lib_rw_start_writing_data_device "writer_pid" "writer_script" 0 0 $res
lib_rw_stop_writing_data_device $writer_script "write_count"
lib_wait_until_action_stops "replay" $secondary_host $res \
$crash_maxtime_sync \
$crash_time_constant_sync "time_waited"
$crash_maxtime_apply \
$crash_time_constant_apply "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: apply time: $time_waited"
@ -113,7 +117,7 @@ function crash_reboot
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 mars_dev=$3 maxtime_to_reboot=$4
local maxtime_to_become_unreachable=$5
local lilo_label_name=$6
local boot_label_name=$6
local pids_to_kill host
if [ -z "$crash_print_linktree_during_reboot" ]; then
@ -123,8 +127,10 @@ function crash_reboot
then
lib_exit 1 "to print symlink trees secondary_host must be given"
fi
install_mars_activate_kernel_to_boot_with_lilo $primary_host \
$lilo_label_name
if [ "${main_host_bootloader_list[$primary_host]}" = "lilo" ]; then
install_mars_activate_kernel_to_boot_with_lilo $primary_host \
$boot_label_name
fi
main_error_recovery_functions["lib_rw_stop_scripts"]=

View File

@ -26,7 +26,7 @@ function file_destroy_run
local secondary_host="${main_host_list[1]}"
local res=${resource_name_list[0]}
local logfile length_logfile writer_pid writer_script write_count
local time_waited
local time_waited net_throughput
lib_wait_for_initial_end_of_sync $secondary_host $res \
$resource_maxtime_initial_sync \
@ -48,7 +48,8 @@ function file_destroy_run
lib_rw_stop_writing_data_device $writer_script "write_count"
lib_wait_until_fetch_stops "file_destroy" $secondary_host $primary_host \
$res "logfile" "length_logfile" "time_waited"
$res "logfile" "length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: fetch time: $time_waited"
@ -62,7 +63,7 @@ function file_destroy_run
lib_wait_until_action_stops "replay" $secondary_host $res \
$file_destroy_maxtime_apply \
$file_destroy_time_constant_apply \
"time_waited"
"time_waited" 0 "net_throughput"
lib_vmsg " ${FUNCNAME[0]}: apply time: $time_waited"
marsview_check $secondary_host $res "disk" "Outdated\[.*A.*\]" || lib_exit 1

View File

@ -17,12 +17,58 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
function lib_wait_until_fetch_stops
function lib_wait_until_logfile_has_length
{
[ $# -eq 7 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 logfile=$2 length_logfile=$3
local varname_time_waited=$4 maxwait=$5 check_net_throughput=$6 varname_net_throughput=$7
local act_length
local waited=0 start_time=$(date +'%s') end_time
local my_net_throughput=0 net_throughput_sum=0 net_check_count=0
lib_vmsg " waiting for $host:$logfile to grow to $length_logfile"
while true; do
lib_vmsg " get length of $host:$logfile"
act_length=$(file_handling_get_file_length $secondary_host $logfile) \
|| lib_exit 1
if [ $act_length -ge $length_logfile ]; then
end_time=$(date +'%s')
break
fi
sleep 1
let waited+=1
lib_vmsg " waited $waited for $logfile act = $act_length, req = $length_logfile"
if [ $waited -eq $maxwait ]; then
lib_exit 1 "maxwait $maxwait exceeded"
fi
if [ $check_net_throughput -eq 1 ]; then
if [ $(( $waited % $perftest_check_net_throughput_intervall )) -eq 0 ]; then
perftest_check_tcp_connection $primary_host $secondary_host \
"my_net_throughput"
let net_throughput_sum+=$my_net_throughput
let net_check_count+=1
fi
fi
done
eval $varname_time_waited=$(($end_time - $start_time))
if [ $check_net_throughput -eq 1 ]; then
local rate
if [ $net_check_count -eq 0 ]; then
rate=0
else
rate=$(($net_throughput_sum / $net_check_count))
fi
eval $varname_net_throughput=$rate
fi
}
function lib_wait_until_fetch_stops
{
[ $# -eq 9 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local module=$1 secondary_host=$2 primary_host=$3 res=$4
local varname_logfile=$5 varname_length_logfile=$6
local varname_time_waited=$7
local varname_time_waited=$7 check_net_throughput=$8 varname_net_throughput=$9
local maxtime_fetch time_constant_fetch var v
for var in maxtime_fetch time_constant_fetch; do
@ -37,17 +83,20 @@ function lib_wait_until_fetch_stops
$maxtime_fetch \
$time_constant_fetch \
$varname_logfile $varname_length_logfile \
$varname_time_waited
$varname_time_waited \
$check_net_throughput $varname_net_throughput
}
function lib_wait_internal_until_fetch_stops
{
[ $# -eq 8 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
[ $# -eq 10 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local secondary_host=$1 res=$2 primary_host=$3 maxwait=$4 inactive_wait=$5
local varname_logfile=$6 varname_logfile_length=$7 varname_time_waited=$8
local check_net_throughput=$9 varname_net_throughput=${10}
local inactive_waited=0 msg
local my_logfile length file_and_length file_and_length_old="x"
local waited=0 msg start_time=$(date +'%s') end_time
local my_net_throughput=0 net_throughput_sum=0 net_check_count=0
while [ $waited -lt $maxwait ]; do
my_logfile=$(marsadm_get_last_logfile $secondary_host $res \
$primary_host) || lib_exit 1
@ -70,11 +119,29 @@ function lib_wait_internal_until_fetch_stops
let waited+=1
msg=" waited $waited for $my_logfile act = $file_and_length, old = $file_and_length_old"
lib_vmsg "$msg"
if [ $check_net_throughput -eq 1 ]; then
if [ $(( $waited % $perftest_check_net_throughput_intervall )) -eq 0 ]; then
perftest_check_tcp_connection $primary_host $secondary_host \
"my_net_throughput"
let net_throughput_sum+=$my_net_throughput
let net_check_count+=1
fi
fi
file_and_length_old="$file_and_length"
done
if [ $waited -eq $maxwait ]; then
lib_exit 1 "$msg"
fi
if [ $check_net_throughput -eq 1 ]; then
local rate
if [ $net_check_count -eq 0 ]; then
rate=0
else
rate=$(($net_throughput_sum / $net_check_count))
fi
eval $varname_net_throughput=$rate
fi
eval $varname_logfile_length=$length
eval $varname_logfile=$my_logfile
eval $varname_time_waited=$(($end_time - $start_time))
@ -85,12 +152,14 @@ function lib_wait_internal_until_fetch_stops
# the time waited is returned in the variable, whichs name is given by $6
function lib_wait_until_action_stops
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
[ $# -eq 8 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local action=$1 host=$2 res=$3 maxwait=$4 inactive_wait=$5
local varname_time_waited=$6
local varname_time_waited=$6 check_net_throughput=$7
local varname_net_throughput=$8
local waited=0 link_value link_value_old="x"
local inactive_waited=0 msg start_time=$(date +'%s') end_time
local link=$(lib_linktree_get_res_host_linkname $host $res $action)
local my_net_throughput=0 net_throughput_sum=0 net_check_count=0
while [ $waited -lt $maxwait ]; do
link_value=$(lib_remote_idfile $host readlink $link) || \
lib_exit 1 "cannot read link $link"
@ -109,12 +178,29 @@ function lib_wait_until_action_stops
let waited+=1
msg=" waited $waited for $action to stop. old = $link_value_old, act = $link_value"
lib_vmsg "$msg"
if [ $check_net_throughput -eq 1 ]; then
if [ $(( $waited % $check_net_throughput )) -eq 0 ]; then
perftest_check_tcp_connection $primary_host $secondary_host \
"my_net_throughput"
let net_throughput_sum+=$my_net_throughput
let net_check_count+=1
fi
fi
link_value_old="$link_value"
done
if [ $waited -eq $maxwait ]; then
lib_exit 1 "$msg"
fi
eval $varname_time_waited=$(($end_time - $start_time))
if [ $check_net_throughput -eq 1 ]; then
local rate
if [ $net_check_count -eq 0 ]; then
rate=0
else
rate=$(($net_throughput_sum / $net_check_count))
fi
eval $varname_net_throughput=$rate
fi
}
function lib_wait_for_initial_end_of_sync
@ -122,8 +208,10 @@ function lib_wait_for_initial_end_of_sync
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local secondary_host=$1 res=$2 maxwait=$3 inactive_wait=$4
local varname_time_waited=$5
local net_throughput
lib_wait_until_action_stops "syncstatus" $secondary_host $res $maxwait \
$inactive_wait $varname_time_waited
$inactive_wait $varname_time_waited 0 \
"net_throughput"
# after sync disk state must be Outdated || Uptodate
marsview_check $secondary_host $res "disk" ".*date.*" || lib_exit 1
}
@ -134,6 +222,7 @@ function lib_wait_for_secondary_to_become_uptodate
local module_name=$1 secondary_host=$2 primary_host=$3 res=$4
local dev=$5 dev_size_to_compare=$6
local host role logfile length_logfile time_waited write_count
local net_throughput mount_point
local maxtime_apply time_constant_apply str var
for str in "maxtime" "time_constant"; do
@ -145,15 +234,17 @@ function lib_wait_for_secondary_to_become_uptodate
done
lib_wait_until_fetch_stops $module_name $secondary_host $primary_host $res \
"logfile" "length_logfile" "time_waited"
"logfile" "length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]} called from ${FUNCNAME[1]}: fetch time: $time_waited"
file_handling_check_equality_of_file_lengths $logfile $primary_host \
$secondary_host $length_logfile
lib_wait_until_action_stops "replay" $secondary_host $res \
$maxtime_apply \
$time_constant_apply "time_waited"
$maxtime_apply \
$time_constant_apply "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]} called from ${FUNCNAME[1]}: apply time: $time_waited"
@ -163,10 +254,38 @@ function lib_wait_for_secondary_to_become_uptodate
marsview_check $host $res "repl" "-SFA-" || lib_exit 1
done
if mount_is_device_mounted $primary_host $dev; then
mount_umount $primary_host $dev ${resource_mount_point_list[$res]}
if mount_is_device_mounted $primary_host $dev "mount_point"; then
mount_umount $primary_host $dev $mount_point
fi
lib_rw_compare_checksums $primary_host $secondary_host $dev \
$dev_size_to_compare "" ""
}
function lib_wait_until_apply_has_reached_length
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local secondary_host=$1 res=$2 logfile=$3 req_applied_length=$4 maxwait=$5
local link=$(lib_linktree_get_res_host_linkname $secondary_host $res "replay")
local link_value waited=0 act_applied_length
while true; do
lib_vmsg " get applied length of $secondary_host:$logfile"
link_value=$(lib_remote_idfile $secondary_host readlink $link) \
|| lib_exit 1 "cannot read link $link"
link_value=(${link_value//,/ })
act_applied_length=${link_value[1]}
if ! expr "$act_applied_length" : '^[0-9][0-9]*$' >/dev/null; then
lib_exit 1 "cannot determine applied length from link_value ${link[@]}"
fi
if [ $act_applied_length -ge $req_applied_length ]; then
break
fi
sleep 1
let waited+=1
lib_vmsg " waited $waited for apply of $logfile act = $act_applied_length, req = $req_applied_length"
if [ $waited -eq $maxwait ]; then
lib_exit 1 "maxwait $maxwait exceeded"
fi
done
}

View File

@ -0,0 +1,743 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
function perftest_check_variables
{
[ -z "$perftest_action" ] && lib_exit 1 "no action defined"
case $perftest_action in # ((
apply|fetch|write|sync|fetch_and_apply) :
;;
*) lib_exit 1 "invalid action $perftest_action"
;;
esac
}
function perftest_run
{
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
local res=${resource_name_list[0]}
perftest_check_variables
perftest_prepare_${perftest_action} $primary_host $secondary_host $res \
$perftest_parallel_writer \
$perftest_result_type \
${#resource_name_list[*]}
case $perftest_action in # ((((
fetch|sync|fetch_and_apply) perftest_start_and_check_nttcp $primary_host $secondary_host
;;
*) :
;;
esac
perftest_do_${perftest_action} $primary_host $secondary_host $res \
$perftest_parallel_writer \
$perftest_result_type \
${#resource_name_list[*]}
perftest_finish $secondary_host
}
function perftest_finish
{
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
perftest_stop_nttcp $secondary_host
}
function perftest_stop_nttcp
{
local secondary_host=$1
for i in "kill" "check"; do
if lib_remote_idfile $secondary_host \
'pgrep -f "'"$perftest_nttcp_start_cmd"'"'
then
if [ "$i" = "kill" ]; then
lib_vmsg " trying to kill nttcp on $secondary_host"
lib_remote_idfile $secondary_host \
'kill -1 $(pgrep -f "'"$perftest_nttcp_start_cmd"'")'
else
lib_vmsg " could not kill nttcp on $secondary_host"
break
fi
else
if [ "$i" = "kill" ]; then
lib_vmsg " no process $perftest_nttcp_start_cmd running"
break
else
lib_vmsg " killed process $perftest_nttcp_start_cmd"
fi
fi
sleep 1
done
}
function perftest_start_and_check_nttcp
{
local primary_host=$1 secondary_host=$2
local net_throughput
local cmd="nttcp -p $perftest_nttcp_port -r -i &"
local host i
if [ ${perftest_check_net_throughput:-0} -eq 0 ]; then
lib_exit 1 "perftest_check_net_throughput not set or 0"
fi
for host in $primary_host $secondary_host; do
lib_vmsg " checking whether nttcp is installed on $host"
lib_remote_idfile $host "type nttcp" || \
lib_exit 1 "nttcp not installed on $host"
done
for i in "start" "check"; do
if ! lib_remote_idfile $secondary_host \
"pgrep -f '"$perftest_nttcp_start_cmd"'"
then
if [ "$i" = "start" ]; then
lib_vmsg " starting nttcp receiver on $secondary_host"
lib_remote_idfile $secondary_host "$perftest_nttcp_start_cmd" &
else
lib_vmsg " could not start receiver on $secondary_host"
return
fi
else
if [ "$i" = "start" ]; then
lib_vmsg " nttcp $perftest_nttcp_start_cmd already running on $secondary_host"
break
else
lib_vmsg " nttcp $perftest_nttcp_start_cmd started on $secondary_host"
fi
fi
sleep 1
done
perftest_check_tcp_connection $primary_host $secondary_host "net_throughput"
}
function perftest_check_tcp_connection
{
[ $# -eq 3 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 varname_throughput=$3
local cmd="nttcp -n16384 -f %9b%8.2rt%8.2ct%15.4rbr%15.4cbr%8c%10.2rcr%10.2ccr -p $perftest_nttcp_port -T $secondary_host"
local nttcp_out mbit_per_second
if [ ${perftest_check_net_throughput:-0} -eq 0 ]; then
lib_exit 1 "perftest_check_net_throughput not set or 0"
fi
lib_vmsg " checking tcp via nttcp on $primary_host"
nttcp_out=($(lib_remote_idfile $primary_host "$cmd"))
echo "${nttcp_out[*]}"
# nttcp_out looks like
# Bytes Real s CPU s Real-MBit/s CPU-MBit/s Calls Real-C/s CPU-C/s
# l 67108864 0.55 0.03 971.1935 17895.6971 16384 29638.47 546133.33
# 1 67108864 0.62 0.09 862.4709 5965.2324 20383 32744.83 226477.78
# and we need the 971 Mbit/s
mbit_per_second=${nttcp_out[14]}
eval $varname_throughput=${mbit_per_second%.*}
}
function perftest_remove_data_file
{
local data_file=$1
shift
local hosts="$@" host
for host in $hosts; do
lib_vmsg " removing $host:$data_file"
lib_remote_idfile $host "rm -f $data_file" || lib_exit 1
done
}
function perftest_do_write
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local writer_pid writer_script write_count writer_start
local writer_rate
writer_start=$(date +'%s')
lib_rw_start_writing_data_device "writer_pid" "writer_script" 0 0 $res
lib_vmsg " sleep $perftest_write_time"
sleep $perftest_write_time
lib_rw_stop_writing_data_device $writer_script "write_count"
writer_rate=$(perftest_get_rate_per_minute $writer_start $(date +'%s') \
$write_count)
main_error_recovery_functions["lib_rw_stop_scripts"]=
mount_umount_data_device_all
lib_vmsg " ${FUNCNAME[0]}: do_write rate: $writer_rate"
perftest_check_result $writer_rate $primary_host "write" $parallel_writer \
$result_type $no_resources \
$(perftest_get_write_subcase_id) -1
}
function perftest_get_rate_per_minute
{
[ $# -eq 3 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local start=$1 end=$2 count=$3
echo $(( (60 * $count ) / ($end - $start) ))
}
function perftest_via_rsync
{
local secondary_host=$1 primary_host=$2 data_file=$3
local varname_time_waited=$4
local start=$(date +'%s') time_waited
perftest_do_rsync $secondary_host $primary_host $data_file
time_waited=$(($(date +'%s') - $start))
eval $varname_time_waited=$time_waited
}
function perftest_do_rsync
{
local secondary_host=$1 primary_host=$2 data_file=$3
lib_vmsg " syncing $primary_host:$data_file to $secondary_host"
lib_remote_idfile $secondary_host \
"rsync -av -e ssh root@$primary_host:$data_file $data_file" || \
lib_exit 1
}
function perftest_start_parallel_writer
{
[ $# -eq 4 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local varname_writer_start=$1 varname_writer_pid=$2 varname_writer_script=$3
local res=$4
mount_mount_data_device
eval $varname_writer_start=$(date +'%s')
lib_rw_start_writing_data_device $varname_writer_pid \
$varname_writer_script 0 0 $res
}
function perftest_finish_parallel_writer
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 writer_script=$2 writer_start=$3
local action=$4 no_resources=$5 subcase_id="$6"
local write_count writer_rate
local caller="${BASH_SOURCE[1]}:${FUNCNAME[1]}:${BASH_LINENO[0]}"
lib_rw_stop_writing_data_device $writer_script "write_count"
writer_rate=$(perftest_get_rate_per_minute $writer_start $(date +'%s') \
$write_count)
main_error_recovery_functions["lib_rw_stop_scripts"]=
lib_vmsg " $caller: do_write rate: $writer_rate"
perftest_check_result $writer_rate $host write_while_$action 0 \
"loops_per_min" $no_resources "$subcase_id" -1
mount_umount_data_device_all
}
function perftest_via_mars_sync
{
[ $# -eq 8 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 res=$3 dev=$4
local parallel_writer=$5 result_type=$6 no_resources=$7 subcase_id=$8
local synctime net_throughput
local start_sync writer_pid writer_script writer_start
marsadm_do_cmd $secondary_host "invalidate" $res || lib_exit 1
if [ $parallel_writer -eq 1 ]; then
writer_start=$(date +'%s')
perftest_start_parallel_writer "writer_start" "writer_pid" \
"writer_script" $res
fi
marsadm_do_cmd $secondary_host "up" $res || lib_exit 1
lib_wait_until_action_stops "syncstatus" $secondary_host $res \
$perftest_maxtime_sync \
$perftest_time_constant_sync \
"synctime" 1 "net_throughput"
perftest_check_result $synctime $secondary_host $perftest_action \
$parallel_writer "time" $no_resources "$subcase_id" $net_throughput
if [ $parallel_writer -eq 1 ]; then
perftest_finish_parallel_writer $primary_host $writer_script \
$writer_start $perftest_action \
$no_resources "$subcase_id"
lib_vmsg " recreating all resources"
resource_recreate_all
else
lib_rw_compare_checksums $primary_host $secondary_host $dev 0 "" ""
fi
}
function perftest_generate_data_file
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 dev=$3 dev_size=$4
local data_file=$5 file_size_in_kb=$(($dev_size * 1024 * 1024))
local host
lib_vmsg " generating file $primary_host:$data_file ($file_size_in_kb KB) from $dev"
lib_remote_idfile $primary_host \
"dd if=$dev of=$data_file bs=1024 count=$file_size_in_kb" || \
lib_exit 1
perftest_do_rsync $secondary_host $primary_host $data_file
for host in $primary_host $secondary_host; do
lib_remote_idfile $host "ls -l --full-time $data_file" || lib_exit 1
done
}
function perftest_sysctrl_sync_modus
{
local hosts="$@" host
local mars_fast_sync_mode=0
if [ "$perftest_sync_mode" = "fast_sync" ]; then
mars_fast_sync_mode=1
fi
for host in $hosts; do
lib_vmsg " setting fast sync mode to $mars_fast_sync_mode on $host"
lib_remote_idfile $host \
"echo $mars_fast_sync_mode > $perftest_sync_mode_proc_file" \
|| lib_exit 1
done
}
function perftest_patch_data_device
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 dev=$2 dev_size_in_kb=$(($3 * 1024 * 1024))
local patch_length_in_kb=$4 no_of_patches=$5
local offset=0 bs=1024 remaining=$dev_size_in_kb
while [ $offset -lt $((dev_size_in_kb - $patch_length_in_kb)) ]; do
lib_vmsg " patching $dev at $offset KB with $patch_length_in_kb KB"
lib_remote_idfile $host \
"yes :$offset: | dd of=$dev bs=$bs skip=$offset count=$patch_length_in_kb" || lib_exit 1
offset=$(($offset + ($dev_size_in_kb / $no_of_patches)))
done
}
function perftest_prepare_sync
{
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local dev=$(lv_config_get_lv_device $res)
local dev_size=$(lv_config_get_lv_size $res)
local time_waited
lib_wait_for_initial_end_of_sync $secondary_host $res \
$resource_maxtime_initial_sync \
$resource_time_constant_initial_sync \
"time_waited"
lib_vmsg " ${FUNCNAME[0]}: sync time: $time_waited"
if [ "$perftest_sync_mode" != "rsync" ]; then
perftest_sysctrl_sync_modus $primary_host $secondary_host
else
perftest_generate_data_file $primary_host $secondary_host $dev \
$dev_size $perftest_data_file
fi
}
function perftest_prepare_apply
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local data_dev=$(resource_get_data_device $res)
local logfile length_logfile time_waited net_throughput
perftest_check_and_get_required_result $secondary_host "apply" \
$parallel_writer $result_type \
$no_resources $perftest_logfile_size_in_gb >/dev/null \
|| lib_exit 1
perftest_prepare_resource $res $secondary_host
marsadm_do_cmd $secondary_host "pause-replay" $res || lib_exit 1
perftest_write_to_device $primary_host $res $data_dev $perftest_data_in_gb_to_write
lib_wait_until_fetch_stops "perftest" $secondary_host $primary_host $res \
"logfile" "length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: fetch time: $time_waited"
if [ $(lib_round_to_gb $length_logfile) -ne $$perftest_logfile_size_in_gb ]
then
lib_exit 1 "req. logfile length = $$perftest_logfile_size_in_gb != $(lib_round_to_gb $length_logfile) = act. logfile length"
fi
marsadm_do_cmd $secondary_host "disconnect" $res || lib_exit 1
}
function perftest_do_apply
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local time_waited net_throughput
local writer_start writer_pid writer_script
if [ $parallel_writer -eq 1 ]; then
writer_start=$(date +'%s')
perftest_start_parallel_writer "writer_start" "writer_pid" \
"writer_script" $res
fi
marsadm_do_cmd $secondary_host "resume-replay" $res || lib_exit 1
lib_wait_until_action_stops "replay" $secondary_host $res \
$perftest_maxtime_apply \
$perftest_time_constant_apply "time_waited" \
0 "net_throughput"
lib_vmsg " ${FUNCNAME[0]}: do_$perftest_action time: $time_waited"
perftest_check_result $time_waited $secondary_host $perftest_action \
$parallel_writer $result_type $no_resources \
$perftest_logfile_size_in_gb -1
if [ $parallel_writer -eq 1 ]; then
perftest_finish_parallel_writer $primary_host $writer_script \
$writer_start $perftest_action \
$no_resources $perftest_logfile_size_in_gb
fi
}
function perftest_prepare_resource
{
local res=$1 secondary_host=$2
resource_rm_resource_dir_all $res
resource_run
marsview_wait_for_state $secondary_host $res "disk" "Uptodate" \
$perftest_maxtime_state_constant || lib_exit 1
}
function perftest_prepare_fetch
{
perftest_prepare_fetch_or_fetch_and_apply "$@"
}
function perftest_prepare_fetch_and_apply
{
perftest_prepare_fetch_or_fetch_and_apply "$@"
}
function perftest_do_fetch
{
perftest_do_fetch_or_fetch_and_apply "$@"
}
function perftest_do_fetch_and_apply
{
perftest_do_fetch_or_fetch_and_apply "$@"
}
function perftest_prepare_fetch_or_fetch_and_apply
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local data_dev=$(resource_get_data_device $res)
local logfile logfile length_logfile
perftest_check_and_get_required_result $secondary_host $perftest_action \
$parallel_writer $result_type \
$no_resources $perftest_logfile_size_in_gb >/dev/null \
|| lib_exit 1
perftest_prepare_resource $res $secondary_host
marsadm_do_cmd $secondary_host "pause-replay" $res || lib_exit 1
marsadm_do_cmd $secondary_host "disconnect" $res || lib_exit 1
perftest_write_to_device $primary_host $res $data_dev $perftest_data_in_gb_to_write
perftest_check_length_last_logfile $primary_host $res $primary_host \
$perftest_logfile_size_in_gb
}
function perftest_get_length_last_logfile
{
local host=$1 res=$2 primary_host=$3
local length_logfile
logfile=$(marsadm_get_last_logfile $host $res $primary_host) || lib_exit 1
length_logfile=$(file_handling_get_file_length $host $logfile) || lib_exit 1
echo $length_logfile
}
function perftest_check_length_last_logfile
{
[ $# -eq 4 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 res=$2 primary_host=$3 logfile_size_in_gb_req=$4
local length_logfile
length_logfile=$(perftest_get_length_last_logfile $host $res $primary_host)
if [ $(lib_round_to_gb $length_logfile) -ne $logfile_size_in_gb_req ]; then
lib_exit 1 "req. logfile length = $logfile_size_in_gb_req != $(lib_round_to_gb $length_logfile) = act. logfile length"
fi
}
function perftest_do_fetch_or_fetch_and_apply
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local logfile length_logfile time_waited net_throughput
local last_logfile_primary last_logfile_length_primary
local writer_start writer_pid writer_script
# with parallel writing, fetch will never stop. Though we take the time
# until the actually last logfile has been fetched
if [ $parallel_writer -eq 1 ]; then
last_logfile_primary=$(marsadm_get_last_logfile $primary_host $res \
$primary_host) || lib_exit 1
last_logfile_length_primary=$(file_handling_get_file_length \
$primary_host $last_logfile_primary) \
|| lib_exit 1
lib_vmsg " last logfile $primary_host:$last_logfile_primary has length $last_logfile_length_primary"
writer_start=$(date +'%s')
perftest_start_parallel_writer "writer_start" "writer_pid" \
"writer_script" $res
fi
marsadm_do_cmd $secondary_host "connect" $res || lib_exit 1
if [ $perftest_action = "fetch_and_apply" ]; then
marsadm_do_cmd $secondary_host "resume-replay" $res || lib_exit 1
fi
if [ $parallel_writer -eq 0 ]; then
lib_wait_until_fetch_stops "perftest" $secondary_host $primary_host \
$res "logfile" "length_logfile" \
"time_waited" 1 "net_throughput"
file_handling_check_equality_of_file_lengths $logfile $primary_host \
$secondary_host \
$length_logfile
else
lib_wait_until_logfile_has_length $secondary_host \
$last_logfile_primary \
$last_logfile_length_primary \
"time_waited" \
$perftest_maxtime_fetch 1 \
"net_throughput"
if [ $perftest_action = "fetch_and_apply" ]; then
lib_wait_until_apply_has_reached_length $secondary_host $res $last_logfile_primary \
$last_logfile_length_primary \
$perftest_wait_for_apply_to_stop_after_fetch_end
fi
fi
lib_vmsg " ${FUNCNAME[0]}: do_fetch time: $time_waited"
perftest_check_result $time_waited $secondary_host $perftest_action \
$parallel_writer $result_type $no_resources \
$perftest_logfile_size_in_gb $net_throughput
if [ $parallel_writer -eq 1 ]; then
perftest_finish_parallel_writer $primary_host $writer_script \
$writer_start $perftest_action \
$no_resources $perftest_logfile_size_in_gb
fi
}
function perftest_get_result_index
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 action=$2 parallel_writer=$3 result_type=$4 no_resources=$5
local subcase_id="$6"
echo "$host,$action,$parallel_writer,$result_type,$no_resources,$subcase_id"
}
function perftest_check_and_get_required_result
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 action=$2 parallel_writer=$3 result_type=$4 no_resources=$5
local subcase_id="$6"
local result_index="$(perftest_get_result_index $host $action \
$parallel_writer $result_type \
$no_resources $subcase_id)"
if [ -z "${perftest_required_result_list[$result_index]}" ]; then
lib_exit 1 "no value in perftest_required_result_list for index $result_index"
fi
echo ${perftest_required_result_list[$result_index]}
}
function perftest_check_result
{
[ $# -eq 8 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local act_result=$1
local host=$2 action=$3 parallel_writer=$4 result_type=$5 no_resources=$6
local subcase_id="$7" net_throughput=$8
local req_result req_result_string act_diff_percentage max_diff_percentage
req_result_string=$(perftest_check_and_get_required_result $host $action \
$parallel_writer $result_type \
$no_resources $subcase_id) \
|| lib_exit 1
req_result=${req_result_string%,*}
max_diff_percentage=${req_result_string#*,}
act_diff_percentage=$(( (($act_result - $req_result) * 100 ) / $req_result ))
lib_vmsg " checking result $act_result for index $(perftest_get_result_index \
$host $action $parallel_writer \
$result_type \
$no_resources \
$subcase_id), req. = $req_result max_diff = $max_diff_percentage%, act. diff percentage = $act_diff_percentage%, net rate = $net_throughput"
if [ $act_diff_percentage -lt -$max_diff_percentage \
-o $act_diff_percentage -gt $max_diff_percentage ]
then
echo "$perftest_errortag_result_out_of_bounds: act. result $act_result differs more than $max_diff_percentage% from $req_result" >&2
fi
}
function perftest_write_to_device
{
local host=$1 res=$2 dev=$3 data_in_gb_to_write=$4
local bs=1024 dd_count=$((1024*1024))
lib_vmsg " writing $data_in_gb_to_write GB to $host:$dev"
for i in $(seq 1 1 $data_in_gb_to_write); do
lib_remote_idfile $host \
'yes $(printf "%0.1024d" '$i') | dd of='"$dev"' bs='"$bs"' count='$dd_count'' \
|| lib_exit 1
done
}
function perftest_get_sync_subcase_id
{
local no_of_patches=$1 patch_length_in_kb=$2 sync_mode=$3
echo "$no_of_patches:$patch_length_in_kb:$sync_mode"
}
function perftest_do_sync
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local dev=$(lv_config_get_lv_device $res)
local dev_size=$(lv_config_get_lv_size $res)
local i patch_length_in_kb no_of_patches
local dev_to_patch
if [ "$perftest_sync_mode" != "rsync" ]; then
dev_to_patch=$dev
else
dev_to_patch=$perftest_data_file
fi
for i in ${!perftest_patch_length_list[@]}; do
no_of_patches=${perftest_number_of_patches_list[$i]}
patch_length_in_kb=${perftest_patch_length_list[$i]}
local subcase_id="$(perftest_get_sync_subcase_id $no_of_patches \
$patch_length_in_kb \
$perftest_sync_mode)"
perftest_check_and_get_required_result $secondary_host "sync" \
$parallel_writer $result_type \
$no_resources "$subcase_id" >/dev/null \
|| lib_exit 1
lib_vmsg " patchlength $patch_length_in_kb, no of patches $no_of_patches"
if [ "$perftest_sync_mode" != "rsync" ]; then
marsadm_do_cmd $secondary_host "down" $res || lib_exit 1
fi
perftest_patch_data_device $secondary_host $dev_to_patch $dev_size \
$patch_length_in_kb $no_of_patches
if [ "$perftest_sync_mode" != "rsync" ]; then
perftest_via_mars_sync $primary_host $secondary_host $res $dev \
$parallel_writer $result_type $no_resources \
"$subcase_id"
else
perftest_via_rsync $secondary_host $primary_host \
$perftest_data_file "synctime" "$subcase_id"
fi
done
if [ "$perftest_sync_mode" == "rsync" ]; then
perftest_remove_data_file $perftest_data_file $primary_host \
$secondary_host
fi
}
function perftest_get_write_subcase_id
{
local subcase_id
subcase_id="$perftest_write_time:$(lv_config_get_lv_size ${resource_name_list[0]}):$lib_rw_part_of_device_size_written_per_loop"
if [ $perftest_division_mars_device_data_device -eq 1 ]; then
subcase_id+=":$perftest_device_division"
fi
echo "$subcase_id"
}
function perftest_prepare_write
{
[ $# -eq 6 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
lib_vmsg " executing ${FUNCNAME[0]}"
local primary_host=$1 secondary_host=$2 res=$3
local parallel_writer=$4 result_type=$5 no_resources=$6
local data_dev=$(resource_get_data_device $res)
local logfile length_logfile time_waited
if [ -z "$perftest_write_time" ]; then
lib_exit 1 "perftest_write_time not set"
fi
perftest_check_and_get_required_result $primary_host "write" \
$parallel_writer $result_type \
$no_resources \
$(perftest_get_write_subcase_id) >/dev/null \
|| lib_exit 1
perftest_prepare_resource $res $secondary_host
mount_mount_data_device
if [ $perftest_division_mars_device_data_device -eq 1 ]; then
perftest_switch_bbu_cache $primary_host
fi
}
function perftest_switch_bbu_cache
{
local host=$1 cmd
case $perftest_device_division in # (((
separated_and_mars_dev_without_bbu_cache)
lib_vmsg " disabling bbu cache on $host"
for cmd in "${perftest_bbu_disable_cmd_list[@]}"; do
lib_remote_idfile $host $cmd || lib_exit 1
done
;;
separated_and_mars_dev_with_bbu_cache)
lib_vmsg " enabling bbu cache on $host"
for cmd in "${perftest_bbu_enable_cmd_list[@]}"; do
lib_remote_idfile $host $cmd || lib_exit 1
done
;;
same_controller) :
;;
*) lib_exit 1 "invalid value $perftest_device_division for perftest_device_division in"
;;
esac
for cmd in "${perftest_bbu_show_cmd_list[@]}"; do
lib_remote_idfile $host $cmd || lib_exit 1
done
}

View File

@ -1,218 +0,0 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
function synctest_check_variables
{
if [ ${#synctest_patch_length_list[@]} \
-ne ${#synctest_number_of_patches_list[@]} ]
then
lib_exit 1 " different array lengths ${#synctest_patch_length_list[@]} != ${#synctest_number_of_patches_list[@]}"
fi
}
function synctest_run
{
local primary_host=${main_host_list[0]}
local secondary_host=${main_host_list[1]}
local res=${resource_name_list[0]}
local dev=$(lv_config_get_lv_device $res)
local dev_size=$(lv_config_get_lv_size $res)
local synctime i patch_length_kb no_of_patches
local synctimes dev_to_patch time_waited
synctest_check_variables
lib_wait_for_initial_end_of_sync $secondary_host $res \
$resource_maxtime_initial_sync \
$resource_time_constant_initial_sync \
"time_waited"
lib_vmsg " ${FUNCNAME[0]}: sync time: $time_waited"
if [ $synctest_use_mars_sync -eq 1 ]; then
dev_to_patch=$dev
synctest_set_sync_modus $primary_host $secondary_host
else
dev_to_patch=$synctest_data_file
synctest_generate_data_file $primary_host $secondary_host $dev \
$dev_size $synctest_data_file
fi
if [ $synctest_parallel_writer -eq 1 ]; then
mount_mount_data_device
synctest_determine_write_rate $primary_host
fi
for i in ${!synctest_patch_length_list[@]}; do
patch_length_in_kb=${synctest_patch_length_list[$i]}
no_of_patches=${synctest_number_of_patches_list[$i]}
lib_vmsg " patchlength $patch_length_in_kb, no of patches $no_of_patches"
if [ $synctest_use_mars_sync -eq 1 ]; then
marsadm_do_cmd $secondary_host "down" $res || lib_exit 1
fi
synctest_patch_data_device $secondary_host $dev_to_patch $dev_size \
$patch_length_in_kb $no_of_patches
if [ $synctest_use_mars_sync -eq 1 ]; then
synctest_via_mars_sync $primary_host $secondary_host $res $dev \
"synctime"
else
synctest_via_rsync $secondary_host $primary_host \
$synctest_data_file "synctime"
fi
echo " synctime = $synctime"
synctimes[$i]=$synctime
done
echo "use_mars_sync=$synctest_use_mars_sync synctimes = ${synctimes[@]}"
if [ $synctest_use_mars_sync -eq 0 ]; then
synctest_remove_data_file $synctest_data_file $primary_host \
$secondary_host
fi
}
function synctest_remove_data_file
{
local data_file=$1
shift
local hosts="$@" host
for host in $hosts; do
lib_vmsg " removing $host:$data_file"
lib_remote_idfile $host "rm -f $data_file" || lib_exit 1
done
}
function synctest_determine_write_rate
{
local host=$1 writer_pid writer_script write_count
local res=${resource_name_list[0]}
lib_rw_start_writing_data_device "writer_pid" "writer_script" 0 0 $res
sleep $synctest_write_time
lib_rw_stop_writing_data_device $writer_script "write_count"
echo "write_count:$write_count, time_write=$synctest_write_time, rate = $(((60 * $write_count ) / $synctest_write_time)) / min"
}
function synctest_via_rsync
{
local secondary_host=$1 primary_host=$2 data_file=$3
local varname_time_waited=$4
local start=$(date +'%s') time_waited
synctest_do_rsync $secondary_host $primary_host $data_file
time_waited=$(($(date +'%s') - $start))
eval $varname_time_waited=$time_waited
}
function synctest_do_rsync
{
local secondary_host=$1 primary_host=$2 data_file=$3
lib_vmsg " syncing $primary_host:$data_file to $secondary_host"
lib_remote_idfile $secondary_host \
"rsync -av -e ssh root@$primary_host:$data_file $data_file" || \
lib_exit 1
}
function synctest_via_mars_sync
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 res=$3 dev=$4
local varname_time_waited=$5
local start_sync writer_pid writer_script writer_start writer_time
marsadm_do_cmd $secondary_host "invalidate" $res || lib_exit 1
if [ $synctest_parallel_writer -eq 1 ]; then
writer_start=$(date +'%s')
lib_rw_start_writing_data_device "writer_pid" "writer_script" 0 0 $res
fi
marsadm_do_cmd $secondary_host "up" $res || lib_exit 1
lib_wait_until_action_stops "syncstatus" $secondary_host $res \
$synctest_maxtime_sync \
$synctest_time_constant_sync \
$varname_time_waited
if [ $synctest_parallel_writer -eq 1 ]; then
local write_count logfile length_logfile time_waited time_sync
lib_rw_stop_writing_data_device $writer_script "write_count"
writer_time=$(( $(date +'%s') - $writer_start ))
eval time_sync='$'$varname_time_waited
echo "write_count:$write_count, time_sync=$time_sync, rate = $(((60 * $write_count ) / $writer_time )) / min"
lib_vmsg " recreating all resources"
resource_recreate_all
else
lib_rw_compare_checksums $primary_host $secondary_host $dev 0 "" ""
fi
}
function synctest_generate_data_file
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local primary_host=$1 secondary_host=$2 dev=$3 dev_size=$4
local data_file=$5 file_size_in_kb=$(($dev_size * 1024 * 1024))
local host
lib_vmsg " generating file $primary_host:$data_file ($file_size_in_kb KB) from $dev"
lib_remote_idfile $primary_host \
"dd if=$dev of=$data_file bs=1024 count=$file_size_in_kb" || \
lib_exit 1
synctest_do_rsync $secondary_host $primary_host $data_file
for host in $primary_host $secondary_host; do
lib_remote_idfile $host "ls -l --full-time $data_file" || lib_exit 1
done
}
function synctest_set_sync_modus
{
local hosts="$@" host
for host in $hosts; do
lib_vmsg " setting fast sync mode to $synctest_fast_sync on $host"
lib_remote_idfile $host \
"echo $synctest_fast_sync > $synctest_sync_mode_proc_file" \
|| lib_exit 1
done
}
function synctest_patch_data_device
{
[ $# -eq 5 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 dev=$2 dev_size_in_kb=$(($3 * 1024 * 1024))
local patch_length_in_kb=$4 no_of_patches=$5
local offset=0 bs=1024 remaining=$dev_size_in_kb
while [ $offset -lt $((dev_size_in_kb - $patch_length_in_kb)) ]; do
lib_vmsg " patching $dev at $offset KB with $patch_length_in_kb KB"
lib_remote_idfile $host \
"yes :$offset: | dd of=$dev bs=$bs skip=$offset count=$patch_length_in_kb" || lib_exit 1
offset=$(($offset + ($dev_size_in_kb / $no_of_patches)))
done
}

View File

@ -25,7 +25,7 @@ function datadev_full_run
local secondary_host=${main_host_list[1]}
local res=${resource_name_list[0]}
local dev=$(lv_config_get_lv_device $res)
local data_dev=$(resource_get_name_data_device $res)
local data_dev=$(resource_get_data_device $res)
local data_dev_size_orig=$(lv_config_get_lv_size $res)
local data_dev_size_new=$(($data_dev_size_orig + $resize_size_to_add))
local mars_data_dev_size_new=$((data_dev_size_new \

View File

@ -0,0 +1,36 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
## This test provoked a memleak error in mars versions up to light0.1beta0.12
function memleak_run
{
local primary_host=${main_host_list[0]}
local res=${resource_name_list[0]}
local data_dev=$(resource_get_data_device $res)
local cmd='dd if=/dev/zero of='"$data_dev"' bs=4096 count=1000000 & sleep 1; kill -9 $(jobs -p); maxcount=20; count=0; while test $count -lt $maxcount && ! marsadm secondary '"$res"' ; do date; echo $count; sleep 1; let count+=1; done'
resource_create $primary_host $res
lib_vmsg " starting on $primary_host: $cmd"
lib_remote_idfile $primary_host $cmd
# wait a little for an error file to appear
sleep 3
}

View File

@ -110,7 +110,8 @@ function lib_rw_start_writing_data_device
local res=$5
lib_rw_write_and_delete_loop ${main_host_list[0]} \
${resource_mount_point_list[$res]}/$lib_rw_file_to_write \
$(lv_config_get_lv_size ${resource_name_list[0]}) 4 \
$(lv_config_get_lv_size ${resource_name_list[0]}) \
$lib_rw_part_of_device_size_written_per_loop \
$varname_pid $varname_script $no_of_loops $sleeptime
}
@ -183,9 +184,15 @@ function lib_rw_mount_data_device
{
local host=$1 dev=$2 mount_point=$3
local res=${resource_name_list[0]}
if ! mount_is_device_mounted $host $dev; then
local mount_point
if ! mount_is_device_mounted $host $dev "mount_point"; then
mount_mount $host $dev $mount_point ${resource_fs_type_list[$res]} || \
lib_exit 1
fi
}
function lib_wait_until_replay_has_exceeded
{
local secondary_host=$1 logfile_primary=$2 logfile_length_primary=$3 maxwait=$4
}

View File

@ -100,7 +100,7 @@ function lib_linktree_check_link
else
sleep 1
let waited+=1
lib_vmsg " waited $waited for $link to take $link_value_expected"
lib_vmsg " waited $waited for $link to become $link_value_expected"
continue
fi
else

View File

@ -28,7 +28,7 @@ function apply_fetch_run
local secondary_host=${main_host_list[1]}
local res=${resource_name_list[0]}
local writer_pid writer_script write_count
local logfile length_logfile time_waited
local logfile length_logfile time_waited net_throughput
lib_wait_for_initial_end_of_sync $secondary_host $res \
$resource_maxtime_initial_sync \
@ -44,7 +44,8 @@ function apply_fetch_run
lib_wait_until_action_stops "replay" $secondary_host $res \
$apply_fetch_maxtime_apply \
$apply_fetch_time_constant_apply "time_waited"
$apply_fetch_time_constant_apply \
"time_waited" 0 "net_throughput"
lib_vmsg " ${FUNCNAME[0]}: apply time: $time_waited"
marsview_check $secondary_host $res "disk" "Outdated\[.*A.*\]" \
@ -53,7 +54,8 @@ function apply_fetch_run
marsadm_pause_cmd "fetch" $secondary_host $res
lib_wait_until_fetch_stops "apply_fetch" $secondary_host $primary_host \
$res "logfile" "length_logfile" "time_waited"
$res "logfile" "length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: fetch time: $time_waited"
@ -66,10 +68,10 @@ function apply_fetch_run
lib_wait_until_action_stops "replay" $secondary_host $res \
$apply_fetch_maxtime_apply_after_disconnect \
$apply_fetch_time_constant_apply_after_disconnect \
"time_waited"
"time_waited" 0 "net_throughput"
lib_vmsg " ${FUNCNAME[0]}: apply time: $time_waited"
marsadm_check_warn_file_and_disk_state $secondary_host $res \
marsadm_check_warnings_and_disk_state $secondary_host $res \
"apply_stopped_after_disconnect"
marsview_check $secondary_host $res "repl" "-S-A-" || lib_exit 1
marsadm_do_cmd $secondary_host "connect" $res || lib_exit 1
@ -79,7 +81,8 @@ function apply_fetch_run
lib_wait_until_fetch_stops "apply_fetch" $secondary_host \
$primary_host $res "logfile" \
"length_logfile" "time_waited"
"length_logfile" "time_waited" 0 \
"net_throughput"
lib_vmsg " ${FUNCNAME[0]}: fetch time: $time_waited"
file_handling_check_equality_of_file_lengths $logfile \

View File

@ -92,8 +92,8 @@ function marsadm_check_post_condition_role_switch
if [ "$role_act" != "$role_req" ]; then
lib_exit 1 "role expected = $role_req != $role_act = role found"
fi
lib_vmsg " role = $role_act, trying ls $(resource_get_name_data_device $res) on $host"
lib_remote_idfile $host "ls -l --full-time $(resource_get_name_data_device $res)"
lib_vmsg " role = $role_act, trying ls $(resource_get_data_device $res) on $host"
lib_remote_idfile $host "ls -l --full-time $(resource_get_data_device $res)"
rc=$?
case $role_act in # (((
primary) if [ $rc -ne 0 ]; then
@ -180,13 +180,15 @@ function marsadm_set_proc_sys_mars_parameter
lib_remote_idfile $host "echo $param_value >$dir/$param" || lib_exit 1
}
function marsadm_check_warn_file_and_disk_state
function marsadm_check_warnings_and_disk_state
{
local host=$1 res=$2 situation="$3"
# wait a little for the files required
sleep 2
case $situation in # ((
apply_stopped_after_disconnect)
local link_value not_applied restlen_in_warn_file
local warn_file="${resource_dir_list[$res]}/2.warn.status"
local warn_file="$cluster_debugfile"
local link=$(lib_linktree_get_res_host_linkname $host $res "replay")
link_value=$(lib_remote_idfile $host "readlink $link") || lib_exit 1

View File

@ -33,17 +33,30 @@ function mount_mount
{
[ $# -eq 4 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 dev="$2" mount_point=$3 fs_type=$4
lib_vmsg " mounting dev $dev on $mount_point on $host"
lib_vmsg " mounting dev $dev (type $fs_type) on $mount_point on $host"
lib_remote_idfile $host mount -t $fs_type $dev $mount_point
return $?
}
function mount_is_device_mounted
{
local host=$1 dev=$2 rc
lib_vmsg " checking whether $dev is mounted on $host"
lib_remote_idfile $host "mount | grep '^$dev on'"
[ $# -eq 3 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 dev=$2 varname_mountpoint=$3 rc
local mount_out devname_in_mounttab
# in the mount table the device appears under the device mapper name
# /dev/mapper/...
lib_vmsg " checking whether $host:$dev is 'device mapped'"
devname_in_mounttab=$(lv_config_get_dm_dev $host $dev)
rc=$?
if [ $rc -ne 0 ]; then
devname_in_mounttab=$dev
fi
lib_vmsg " checking whether $devname_in_mounttab is mounted on $host"
mount_out=($(lib_remote_idfile $host "mount | grep '^$devname_in_mounttab on'"))
rc=$?
if [ $rc -eq 0 ]; then
eval $varname_mountpoint=${mount_out[2]}
fi
return $rc
}
@ -61,20 +74,37 @@ function mount_mount_data_device
local res_no=${1:-0}
local res=${resource_name_list[$res_no]}
local host=${main_host_list[0]}
local dev=$(resource_get_name_data_device $res)
local dev=$(resource_get_data_device $res)
local mount_point=${resource_mount_point_list[$res]}
lib_remote_check_device_fs $host $dev ${resource_fs_type_list[$res]}
lib_rw_mount_data_device $host $dev $mount_point
}
function mount_umount_data_device
function mount_umount_data_device_all
{
local res_no=${1:-0}
local host
local res=${resource_name_list[$res_no]}
local host=${main_host_list[0]}
local dev=$(resource_get_name_data_device $res)
local dev=$(resource_get_data_device $res)
local mount_point=${resource_mount_point_list[$res]}
if mount_is_dir_mountpoint $host $mount_point; then
mount_umount $host $dev $mount_point
fi
for host in ${main_host_list[@]}; do
if mount_is_dir_mountpoint $host $mount_point; then
local maxwait=5 waited=0 rc
while true;do
mount_umount $host $dev $mount_point
rc=$?
if [ $rc -eq 0 ];then
break
fi
sleep 1
let waited+=1
lib_vmsg " waited $waited for unmounting $host:$mount_point"
if [ $waited -eq $maxwait ]; then
lib_exit 1 "maxwait $maxwait exceeded"
fi
done
fi
done
}

View File

@ -116,6 +116,17 @@ check_installed "$check_always_list"
#####################################################################
# miscellanous
## rounds to GiB
function lib_round_to_gb
{
local number=$1
echo $((($number + (512 * 1024 * 1024)) / (1024 * 1024 * 1024)))
}
#####################################################################
# helper for sourcing other config files (may reside in parents of cwd)
function source_config
@ -247,28 +258,27 @@ function lib_remote_check_device_fs
{
[ $# -eq 3 ] || lib_exit 1 "wrong number $# of arguments (args = $*)"
local host=$1 dev=$2 fs_type=$3
lib_vmsg " checking device $dev on host $host"
lib_remote_idfile $host "lvdisplay -C --noheadings -o lv_name $dev" \
|| lib_exit 1
lib_vmsg " checking whether $host:$dev contains a filesystem"
blkid_out=$(lib_remote_idfile $host "blkid -p -u filesystem $dev")
local tmp_dir=/mnt/mars_tmp_mountpoint
lib_vmsg " checking existence of directory $host:$tmp_dir"
lib_remote_idfile $host "if test ! -d $tmp_dir; then mkdir $tmp_dir;fi" \
|| lib_exit 1
lib_vmsg " checking whether $host:$dev is mountable as $fs_type filesystem on $tmp_dir"
lib_remote_idfile $host mount -t $fs_type $dev $tmp_dir
rc=$?
if [ $rc -eq 0 ]; then
if ! expr "$blkid_out" : '.*\(TYPE="'"$fs_type"'"\)'; then
rc=1
fi
mount_umount $host $dev $tmp_dir || lib_exit 1
return
fi
if [ $rc -ne 0 ]; then
if mount_is_dir_mountpoint $host $main_mars_directory; then
mount_umount $host "device_does_not_matter" $main_mars_directory
fi
lib_vmsg " creating $fs_type filesystem on $dev"
lib_remote_idfile $host "mkfs.$fs_type ${lv_config_mkfs_option_list[$fs_type]} $dev" || lib_exit 1
if [ -n "${lv_config_fs_type_tune_cmd_list[$fs_type]}" ];then
local cmd=${lv_config_fs_type_tune_cmd_list[$fs_type]/<dev>/$dev}
lib_vmsg " tuning $dev on $host: $cmd"
lib_remote_idfile $host "$cmd" || lib_exit 1
fi
local mount_point
if mount_is_device_mounted $host $dev "mount_point"; then
mount_umount $host $dev $mount_point
fi
lib_vmsg " creating $fs_type filesystem on $dev"
lib_remote_idfile $host "mkfs.$fs_type ${lv_config_mkfs_option_list[$fs_type]} $dev" || lib_exit 1
if [ -n "${lv_config_fs_type_tune_cmd_list[$fs_type]}" ];then
local cmd=${lv_config_fs_type_tune_cmd_list[$fs_type]/<dev>/$dev}
lib_vmsg " tuning $dev on $host: $cmd"
lib_remote_idfile $host "$cmd" || lib_exit 1
fi
}

View File

@ -158,7 +158,7 @@ while (( resume )); do
eval "$to_start" || { echo "Replay failure $?"; exit -1; }
fi
echo "==> $(date) Finished."
) || { echo "Failure $?"; exit -1; }
) || { rc=$?; echo "Failure $rc"; exit -1; }
echo "==============================================================="
echo ""
# (( resume++ ))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,44 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
## sleep time between marsadm log-rotate commands
remote_dev_log_rotate_sleep=5
## sleep time between marsadm log-delete commands
remote_dev_log_delete_sleep=20
## name of non existant file
remote_dev_non_existant_file='/tmp/i_am_not_there'
## egrep pattern to identify error messages created by trying to access the
## remote file which does not exist
remote_dev_errmsg_pattern=' '"$main_mars_errmsg_prefix"' .*( file .*'"$remote_dev_non_existant_file"'.*|status = -2 *)$'
## number of error messages to wait for
remote_dev_number_errmsg_req=100
## maxtime to wait for requested number of error messages
remote_dev_maxtime_to_wait_for_errmsg=600
## see comment in default-crash.conf
crash_print_linktree_during_reboot=0
run_list="resource_prepare resource_run remote_dev_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,30 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## Parameters for the memleak - Test
verbose_script=1
resource_name_list=(lv-1-2)
resource_set_globals
resource_recreate_fs_on_data_device_required=0
run_list="resource_prepare memleak_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,30 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## Parameters for the memleak - Test
verbose_script=1
resource_name_list=(lv-1-2)
resource_set_globals
resource_recreate_fs_on_data_device_required=0
run_list="resource_prepare memleak_run lib_err_check_and_move_global_err_files_all"

View File

@ -1 +0,0 @@
../aio_filehandle.conf

View File

@ -23,6 +23,11 @@ verbose_script=1
resource_recreate_fs_on_data_device_required=0
## maxtime to wait for secondary to become disk state = Uptodate and
## repl state = Replaying
mars_dir_full_maxtime_state_constant=60
cluster_mars_dir_lv_name_list=([${main_host_list[0]}]=lv-5-10 [${main_host_list[1]}]=lv-6-100)
run_list="cluster_run resource_run resource_fill_data_device lib_err_check_and_move_global_err_files_all"
run_list="cluster_run resource_run resource_fill_mars_dir lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,39 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
# lvs lv-1-700 ... lv-8-700
lv_config_name_list=($(eval echo lv-{1..$multires_lv_count}-$multires_lv_size))
# lvs lv-1-700 ... lv-7-700
resource_name_list=($(eval echo '"${lv_config_name_list["'{0..$(($multires_lv_count - 2))}'"]}"'))
resource_set_globals
marsadm_timeout=10
# lvs lv-8-700 for all hosts
eval cluster_mars_dir_lv_name_list=($(x=(${main_host_list[@]/#/[}); y=(${x[@]/%/]=lv-$multires_lv_count-$multires_lv_size}); echo ${y[@]}))
verbose_script=1
resource_recreate_fs_on_data_device_required=0
run_list="multires_prepare multires_run"

View File

@ -0,0 +1,39 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
# lvs lv-1-700 ... lv-8-700
lv_config_name_list=($(eval echo lv-{1..$multires_lv_count}-$multires_lv_size))
# lvs lv-1-700 ... lv-7-700
resource_name_list=($(eval echo '"${lv_config_name_list["'{0..$(($multires_lv_count - 2))}'"]}"'))
resource_set_globals
marsadm_timeout=10
# lvs lv-8-700 for all hosts
eval cluster_mars_dir_lv_name_list=($(x=(${main_host_list[@]/#/[}); y=(${x[@]/%/]=lv-$multires_lv_count-$multires_lv_size}); echo ${y[@]}))
verbose_script=1
resource_recreate_fs_on_data_device_required=0
run_list="multires_prepare multires_run"

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
run_list="resource_prepare resource_run synctest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,33 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='apply'
## time for which the amount of data to apply must be constant to declare
## the apply process as having stopped
perftest_time_constant_apply=10
## maxtime to wait for apply to stop (after pause-apply)
perftest_maxtime_apply=300
resource_recreate_fs_on_data_device_required=0
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -21,5 +21,5 @@
verbose_script=1
synctest_parallel_writer=0
perftest_parallel_writer=0

View File

@ -20,6 +20,6 @@
#####################################################################
verbose_script=1
synctest_parallel_writer=1
run_list="synctest_run lib_err_check_and_move_global_err_files_all"
perftest_parallel_writer=1
resource_recreate_fs_on_data_device_required=1

View File

@ -0,0 +1,39 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='write'
## hosts the tests are running on. The first host is initially always used as
## primary host
main_host_list=("istore-test-bap7" "istore-test-bs7")
## time to run the data device writer to determine write rate
perftest_write_time=60
perftest_result_type="loops_per_min"
perftest_division_mars_device_data_device=1
lib_rw_part_of_device_size_written_per_loop=8
resource_recreate_fs_on_data_device_required=1
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -21,6 +21,5 @@
verbose_script=1
synctest_fast_sync=0
perftest_device_division="same_controller"
run_list="synctest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,49 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
## this device must use physical volumes on a raid controller with the following
## properties:
## - different from the controller of the data device mount_mount_data_device
## - bbu cache can be switched on and off
## on the primary host a 18G lv can be created by:
## - pvcreate /dev/<dev-name>
## - vgextend <vg-name> /dev/<dev-name>
## - lvcreate -n lv-7-18 -L 18G <vg-name> /dev/<dev-name>
perftest_lv_on_bbu_controller="lv-7-18"
perftest_dev_on_bbu_controller_list=(\
[istore-test-bap7]=/dev/cciss/c0d0p7\
)
cluster_mars_dir_lv_name_list["${main_host_list[0]}"]="$perftest_lv_on_bbu_controller"
perftest_bbu_disable_cmd_list=(\
"hpacucli ctrl slot=0 modify nobatterywritecache=disable"\
)
perftest_bbu_enable_cmd_list=(\
"hpacucli ctrl slot=0 modify nobatterywritecache=enable"\
"hpacucli ctrl slot=0 modify cacheratio=0/100"\
)
perftest_bbu_show_cmd_list=("hpacucli ctrl slot=0 show")

View File

@ -21,6 +21,5 @@
verbose_script=1
synctest_fast_sync=1
perftest_device_division="separated_and_mars_dev_with_bbu_cache"
run_list="synctest_run lib_err_check_and_move_global_err_files_all"

View File

@ -21,7 +21,5 @@
verbose_script=1
synctest_use_mars_sync=0
run_list="synctest_run lib_err_check_and_move_global_err_files_all"
perftest_device_division="separated_and_mars_dev_without_bbu_cache"

View File

@ -0,0 +1,34 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='fetch'
## time for which the amount of data to apply must be constant to declare
## the apply process as having stopped
perftest_time_constant_apply=10
## maxtime to wait for apply to stop (after pause-apply)
perftest_maxtime_apply=300
resource_recreate_fs_on_data_device_required=0
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
perftest_parallel_writer=0

View File

@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
perftest_parallel_writer=1
resource_recreate_fs_on_data_device_required=1

View File

@ -0,0 +1,44 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='fetch_and_apply'
## amount of data in GB to be written to the data device
perftest_data_in_gb_to_write=5
## size of logfile (in GiB and rounded) after perftest_data_in_gb_to_write are written
perftest_logfile_size_in_gb=5
## apply should be much faster than fetch, so we wait only 2 seconds for apply to stop
perftest_wait_for_apply_to_stop_after_fetch_end=2
## time for which the amount of data to apply must be constant to declare
## the apply process as having stopped
perftest_time_constant_apply=10
## maxtime to wait for apply to stop (after pause-apply)
perftest_maxtime_apply=300
resource_recreate_fs_on_data_device_required=0
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
perftest_parallel_writer=0

View File

@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#####################################################################
verbose_script=1
perftest_parallel_writer=1
resource_recreate_fs_on_data_device_required=1

View File

@ -21,4 +21,4 @@
verbose_script=1
synctest_fast_sync=1
perftest_sync_mode="fast_sync"

View File

@ -21,5 +21,5 @@
verbose_script=1
synctest_fast_sync=0
perftest_sync_mode="no_fast_sync"

View File

@ -21,5 +21,5 @@
verbose_script=1
synctest_parallel_writer=0
perftest_parallel_writer=0

View File

@ -20,4 +20,6 @@
#####################################################################
verbose_script=1
synctest_parallel_writer=1
perftest_parallel_writer=1
resource_recreate_fs_on_data_device_required=1

View File

@ -21,6 +21,5 @@
verbose_script=1
synctest_use_mars_sync=0
perftest_sync_mode="rsync"

View File

@ -19,4 +19,22 @@
verbose_script=1
run_list="resource_prepare resource_run synctest_run lib_err_check_and_move_global_err_files_all"
perftest_action='sync'
## number of patches the data device will be patched with
perftest_number_of_patches_list=(10 10 4 2)
## list of lengths of the patch (in KB) the data device will be patched with
## must correspond to perftest_number_of_patches_list
perftest_patch_length_list=(1 50000 500000 1000000)
## proc file to switch sync modus
perftest_sync_mode_proc_file='/proc/sys/mars/do_fast_fullsync'
## data file to user for rsync
perftest_data_file=$main_mars_directory/file_to_sync
resource_recreate_fs_on_data_device_required=0
run_list="resource_prepare resource_run perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,31 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='write'
## time to run the data device writer to determine write rate
perftest_write_time=30
perftest_result_type="loops_per_min"
resource_recreate_fs_on_data_device_required=1
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,31 @@
#!/bin/bash
# Copyright 2010-2013 Frank Liepold / 1&1 Internet AG
#
# Email: frank.liepold@1und1.de
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
verbose_script=1
perftest_action='write'
## time to run the data device writer to determine write rate
perftest_write_time=30
perftest_result_type="loops_per_min"
resource_recreate_fs_on_data_device_required=1
run_list="perftest_run lib_err_check_and_move_global_err_files_all"

View File

@ -0,0 +1,35 @@
read performance
/tests/small_file/low_load
/tests/small_file/high_load
/tests/big_file/low_load
/tests/big_file/high_load
/modules/build_env.sh
function create_big_file
/default-build_env.conf
testfile=/tmp/bigfile
/modules/read_perf.sh
function run_readperf
function ...
/default-read_perf.conf
filesize=20
no_threads=5
/tests/small_file.conf
filesize=10
/tests/big_file.conf
filesize=10000
/tests/low_load.conf
no_threads=40
run_list="run_readperf"
/tests/big_file/high_load/small_buf
/tests/small_buf.conf
bufsize=4

View File

@ -188,3 +188,25 @@ hardcore H1 gap_in_log create and repair H1.1 - pause-apply on secondar
hardcore H3 late_log_comp belatedly completed H3.1 - pause-apply on secondary
logfile after new
logfile has already
---------------------------------------------------------------------------------------------------------------------
perf P2 perf_general fetch only P2.1 - pause-replay - time + rate
apply only P2.2 - disconnect "
sync P2.3 - invalidate "
fetch & apply P2.4 - = normal mode - time + rate
- time + rate
per action
write P2.5 - writing data device - write rate
P2.6 equal to P2.1 to P2.4 as P2.1 to P2.4
... with parallel writing write rate
P2.9
with n devices P2.1 to P2.9 with
2 4 8 devices

View File

@ -0,0 +1,194 @@
# test_suite.txt.official Version 0.01
#
# description of the tests to execute before a mars release
#
# author: Frank Liepold frank.liepold@1und1.de
#
# Can be printed with a2ps -R --rows=1 --columns=1 -l 130 -L101 test_suite.txt
abbreviations:
data_dev_writer : process writing to data device on primary and producing a protocoll containing statistics
about runtime and written data.
device cksum : checking that cksum primary = cksum secondary
recovery procedures : Some testcases cause more or less serious crashes or standstills (e.g. A4.1 below). If
there are documented repair strategies they will be tested, too.
category id Prio testname description testcase and steps to check
done(%)
=========================================================================================================================
basic B 3 marsadm testing of pre and the scope of tests is specified by the
50% post conditions of documents resource_states.txt and
all marsadm cmds states_and_actions.txt and comprises at the
moment about 20 tests of the most important
marsadm commands by checking their pre and
post conditions
-------------------------------------------------------------------------------------------------------------------------
basic B1 2 wait_role marsadm secondary B1.1 - marsadm secondary marsadm ROLE must
100% resp. primary - marsadm ROLE return secondary
may only return - ls /dev/mars/... ls must fail
with success after
the device has B1.2 - marsadm primary marsadm ROLE must
disappeared resp. - marsadm ROLE return primary
appeared - ls /dev/mars/... ls must succeed
-------------------------------------------------------------------------------------------------------------------------
admin A1 1 growing growing the data A1.1 - start data_dev_writer - device cksum
100% device in a running - lvresize on primary and secondary
mars connection - pause-sync on primary and secondary
- marsadm resize on primary
- resume-sync on primary and secondary
- wait for sync end
- stop data_dev_writer
- wait for fetch and apply end
------------------------------------------------------------------------------------------------------------------------
admin A2 2 secon2prima host a: primary A2.1 - start data_dev_writer - device cksum
100% host b: secondary - marsadm primary on host b (must fail)
switch secondary -> - stop data_dev_writer
primary on host b - umount data device
- marsadm primary on host b
------------------------------------------------------------------------------------------------------------------------
admin A3 2 apply_fetch indepedency of apply A3.1 - start data_dev_writer apply must run to
100% and fetch - pause-apply on secondary (nearly) end of
- pause-fetch on secondary fetched logfile
- resume-apply on secondary
A3.2 - start data_dev_writer the whole logfile
- pause-apply on secondary must be fetched
- pause-fetch on secondary
- stop data_dev_writer
- resume-fetch on secondary
-----------------------------------------------------------------------------------------------------------------------
hardcore H2 2 mars_dir_full /mars full H2.1 running full because of logfiles device cksum
100% is regarded as an generated by data_dev_writer
admin error - start data_dev_writer
until /mars full
- rmmod mars on all cluster hosts
- resize /mars
- modprobe mars on all cluster hosts
- start second data_dev_writer
- stop all data_dev_writers
H2.2 running full because another process
is filling /mars
-----------------------------------------------------------------------------------------------------------------------
admin A5 3 datadev_full data device full A5.1 - start data_dev_writer device cksum
100% - wait for data device full
see A1.1
-----------------------------------------------------------------------------------------------------------------------
admin A6 2 logrotate looping logrotate A6.1 - start data_dev_writer - device cksum
100% - endless loop logrotate - impact of logfile
- stop loop after n minutes sizes on write
- stop data_dev_writer performance
- wait for fetch and apply end - impact of
logrotate
frequency on write
performance
-----------------------------------------------------------------------------------------------------------------------
admin A7 2 logdelete looping logrotate A7.1 - start data_dev_writer see A6.1
100% and logdelete - endless loop logrotate
and logdelete
- stop loop after n minutes
- stop data_dev_writer
- wait for fetch and apply end
-----------------------------------------------------------------------------------------------------------------------
admin A8 3 compatibel compatibility of these testcases are to be implemented, when
0% mars versions there are different versions in production
userspace versions
kernel versions
-----------------------------------------------------------------------------------------------------------------------
admin A9 3 standstill recognizing, The most important part is done by marsview
80% indicating and
repair of A9.1 - logfile damage on secondary - error indicator
exceptional (still to specify)
standstills - repair (if
automatable)
- device cksum
-----------------------------------------------------------------------------------------------------------------------
admin A10 3 mult_device multiple data A10.* run several tests parallel - given by the single
0% devices (resources) on multiple mars connections where tests
per host the data devices are in some cases - impact on write
located on the same host performance
still to specify
A10.1 - for i in 1 2 3; do
start data_dev_writer on $i resources
stop data_dev_writer on resources
take write rate of each resource
A10.2 - like A10.1 but with regular log-rotate
and log-delete
------------------------------------------------------------------------------------------------------------------------
admin A11 3 small_sec_dev secondary data At the moment the two devices must have the
0% device smaller at cmd same size
marsadm join-resource
see mail uli 06/23/13 A11.1 - primary create resource (100 MB) - device cksum
- secondary join resource (80 MB)
- start data_dev_writer
- stop data_dev_writer
- switch primary -> secondary
- wait for fetch and apply end
on secondary
-----------------------------------------------------------------------------------------------------------------------
admin A12 3 casc_resize cascades of resize A12.1 to specify amount of synced
0% operations data
-----------------------------------------------------------------------------------------------------------------------
admin A13 3 sync_pos testing new symlink A13.1 to specify
0% syncpos
------------------------------------------------------------------------------------------------------------------------
admin A14 3 filesys all tests on different
0% filesystems
-----------------------------------------------------------------------------------------------------------------------
perf P1 1 fullsync performance P1.1 - data on both data devices nearly - device cksum
100% matching (= secondary data device - sync time
patched with some bytes at some - transfer rate
offsets)
- default mars sync (fast fullsync)
- 2 GB data device
- secondary down
- secondary invalidate
- secondary up
- wait for sync end
P1.2 - similar to P1.1 but:
- "slow" mars sync
P1.3 - similar to P1.1 but:
- data on both with strong differences
P1.4 - similar to P1.1 but:
- data on both with strong differences
- "slow" mars sync
P1.11 equal to P1.1 - P1.4 but with - see P1.1
... data_dev_writer - impact of sync
P1.14 on write
performance
------------------------------------------------------------------------------------------------------------------------
stabil S1 2 net_failure network broken S1.1 - start data_dev_writer - device cksum
S1.1: 100% - manipulation=total cut of connection - impact on write
S1.2ff: 0% - restore network connection performance
- stop data_dev_writer
S1.2ff similar to S1.1 but with different
network connection manipulations
still to specify
------------------------------------------------------------------------------------------------------------------------
stabil S2 1 crash_prim reboot of primary S2.1 - start data_dev_writer device cksum
100% while writing - reboot primary (ipmitool)
------------------------------------------------------------------------------------------------------------------------
stabil S3 3 crash_sec reboot of secondary S3.1 - start data_dev_writer device cksum
0% while applying and - reboot secondary
fetching
------------------------------------------------------------------------------------------------------------------------
hardcore H1 3 gap_in_log create and repair H1.1 - pause-apply on secondary
H1.1: 100% gap in logfile - start data_dev_writer
H1.2: 0% - stop data_dev_writer after n minutes
- wait until fetch complete
- create gap at the end of last logfile
- resume-apply
- wait until apply stops apply must stop
at gap
- repair gap (apply must continue) device cksum
H1.2 - similar to H1.1 but gap in the middle
of the logfiles
-------------------------------------------------------------------------------------------------------------------------
hardcore H3 3 late_log_comp belatedly completed H3.1 to specify
0% logfile after new
logfile has already
arrived