test: add timer loop tests

We've been experiencing timer hangs with mingw-llvm.
std::condition_variable::wait_for was returning a few microseconds
before the requested time and then hanging when called with a
small interval (e.g. microseconds).

This was affecting the OSD periodic tick, which would hang after
a while (20m up to 2h).

The issue can be reproduced with a timer loop and a small interval
(e.g. 40us), in which case the timer is likely to hang after about
10s.

We're adding some tests, while the actual mingw-llvm issue will
be mitigated in a separate commit.

Signed-off-by: Lucian Petrut <lpetrut@cloudbasesolutions.com>
This commit is contained in:
Lucian Petrut 2023-05-04 13:16:56 +00:00
parent 3ce1e4a5c6
commit 09a8cb0a6a
3 changed files with 135 additions and 1 deletions

View File

@ -244,6 +244,72 @@ static int safe_timer_cancellation_test(SafeTimer &safe_timer,
return ret;
}
class TestLoopContext : public Context
{
public:
explicit TestLoopContext(SafeTimer &_t,
ceph::mono_clock::time_point _deadline,
double _interval,
bool& _test_finished)
: t(_t)
, deadline(_deadline)
, interval(_interval)
, test_finished(_test_finished)
{
}
~TestLoopContext() override
{
}
void finish(int r) override
{
if (ceph::mono_clock::now() > deadline) {
test_finished = true;
} else {
// We have to create a new context.
TestLoopContext* new_ctx = new TestLoopContext(
t,
deadline,
interval,
test_finished);
t.add_event_after(ceph::make_timespan(interval), new_ctx);
}
}
protected:
SafeTimer &t;
ceph::mono_clock::time_point deadline;
double interval;
bool& test_finished;
};
static int safe_timer_loop_test(SafeTimer &safe_timer,
ceph::mutex& safe_timer_lock) {
// TODO: consider using gtest.
cout << __PRETTY_FUNCTION__ << std::endl;
bool test_finished = false;
int test_duration = 10;
double tick_interval = 0.00004;
auto deadline = ceph::mono_clock::now() +
std::chrono::seconds(test_duration);
TestLoopContext* ctx = new TestLoopContext(
safe_timer,
deadline,
tick_interval,
test_finished);
safe_timer.add_event_after(ceph::make_timespan(tick_interval), ctx);
std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
if (!test_finished) {
std::cerr << "The timer loop job didn't complete, it probably hung."
<< std::endl;
return -1;
}
}
int main(int argc, const char **argv)
{
auto args = argv_to_vec(argc, argv);
@ -274,6 +340,10 @@ int main(int argc, const char **argv)
if (ret)
goto done;
ret = safe_timer_loop_test(safe_timer, safe_timer_lock);
if (ret)
goto done;
done:
safe_timer.shutdown();
print_status(argv[0], ret);

View File

@ -372,7 +372,7 @@ add_ceph_unittest(unittest_cdc)
add_executable(unittest_ceph_timer test_ceph_timer.cc)
add_ceph_unittest(unittest_ceph_timer)
target_link_libraries(unittest_ceph_timer ceph-common)
target_link_libraries(unittest_ceph_timer global ceph-common)
add_executable(unittest_option test_option.cc)
target_link_libraries(unittest_option ceph-common GTest::Main)

View File

@ -18,6 +18,7 @@
#include <gtest/gtest.h>
#include "common/ceph_time.h"
#include "common/ceph_timer.h"
using namespace std::literals;
@ -133,6 +134,30 @@ void cancellation()
EXPECT_FALSE(timer.cancel_event(e));
}
}
template<typename TC>
void tick(ceph::timer<TC>* t,
typename TC::time_point deadline,
double interval,
bool* test_finished,
typename TC::time_point* last_tick,
uint64_t* tick_count,
typename TC::time_point* last_tp,
typename TC::time_point* second_last_tp) {
*last_tick = TC::now();
*tick_count += 1;
if (TC::now() > deadline) {
*test_finished = true;
} else {
auto tp = TC::now() + ceph::make_timespan(interval);
*second_last_tp = *last_tp;
*last_tp = tp;
t->reschedule_me(tp);
}
}
}
TEST(RunSome, Steady)
@ -161,3 +186,42 @@ TEST(CancelAll, Wall)
{
cancel_all<std::chrono::system_clock>();
}
TEST(TimerLoopTest, TimerLoop)
{
using TC = ceph::coarse_mono_clock;
ceph::timer<TC> t;
bool test_finished = false;
int test_duration = 10;
double tick_interval = 0.00004;
auto last_tick = TC::now();
uint64_t tick_count = 0;
auto last_tp = TC::now();
auto second_last_tp = TC::now();
TC::time_point test_deadline = TC::now() +
std::chrono::seconds(test_duration);
t.add_event(
ceph::make_timespan(tick_interval),
&tick<TC>,
&t,
test_deadline,
tick_interval,
&test_finished,
&last_tick,
&tick_count,
&last_tp,
&second_last_tp);
std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
ASSERT_TRUE(test_finished)
<< "The timer job didn't complete, it probably hung. "
<< "Time since last tick: "
<< (TC::now() - last_tick)
<< ". Tick count: " << tick_count
<< ". Last wait tp: " << last_tp
<< ", second last tp: " << second_last_tp
<< ", deadline tp: " << test_deadline;
}