MINOR: stream: remove the cpu time detection from process_stream()

It was not as efficient as the watchdog in that it would only trigger
after the problem resolved by itself, and still required a huge margin
to make sure we didn't trigger for an invalid reason. This used to leave
little indication about the cause. Better use the watchdog now and
improve it if needed.

The detector of unkillable tasks remains active though.
This commit is contained in:
Willy Tarreau 2019-05-22 08:57:01 +02:00
parent 2bfefdbaef
commit b106ce1c3d
1 changed files with 5 additions and 50 deletions

View File

@ -1751,7 +1751,6 @@ struct task *process_stream(struct task *t, void *context, unsigned short state)
struct channel *req, *res;
struct stream_interface *si_f, *si_b;
unsigned int rate;
uint64_t cpu_time_before, cpu_time_after;
activity[tid].stream++;
@ -1765,11 +1764,6 @@ struct task *process_stream(struct task *t, void *context, unsigned short state)
si_sync_recv(si_f);
si_sync_recv(si_b);
/* measure only the analysers' time */
cpu_time_before = 0;
if (task_profiling_mask & tid_bit)
cpu_time_before = now_cpu_time();
redo:
rate = update_freq_ctr(&s->call_rate, 1);
if (rate >= 100000 && s->call_rate.prev_ctr) { // make sure to wait at least a full second
@ -2644,20 +2638,6 @@ struct task *process_stream(struct task *t, void *context, unsigned short state)
s->pending_events &= ~(TASK_WOKEN_TIMER | TASK_WOKEN_RES);
stream_release_buffers(s);
/* We may have free'd some space in buffers, or have more to send/recv, try again */
if (cpu_time_before) {
cpu_time_after = now_cpu_time();
cpu_time_after -= cpu_time_before;
if (cpu_time_after >= 100000000U) {
/* more than 1 ms CPU time spent in analysers is already absolutely
* insane, but on tiny machines where performance doesn't matter it
* may happen when using tons of regex, so let's fix the threshold
* to 100ms which must never ever be hit.
*/
stream_dump_and_crash(&s->obj_type, -(cpu_time_after / 1000));
}
}
return t; /* nothing more to do */
}
@ -2705,19 +2685,6 @@ struct task *process_stream(struct task *t, void *context, unsigned short state)
/* update time stats for this stream */
stream_update_time_stats(s);
if (cpu_time_before) {
cpu_time_after = now_cpu_time();
cpu_time_after -= cpu_time_before;
if (cpu_time_after >= 100000000U) {
/* more than 1 ms CPU time spent in analysers is already absolutely
* insane, but on tiny machines where performance doesn't matter it
* may happen when using tons of regex, so let's fix the threshold
* to 100ms which must never ever be hit.
*/
stream_dump_and_crash(&s->obj_type, -(cpu_time_after / 1000));
}
}
/* the task MUST not be in the run queue anymore */
stream_free(s);
task_destroy(t);
@ -2932,13 +2899,11 @@ void stream_dump(struct buffer *buf, const struct stream *s, const char *pfx, ch
}
/* dumps an error message for type <type> at ptr <ptr> related to stream <s>,
* having reached loop rate <rate>, then aborts hoping to retrieve a core. If
* <rate> is negative instead it reports a duration in microseconds.
* having reached loop rate <rate>, then aborts hoping to retrieve a core.
*/
void stream_dump_and_crash(enum obj_type *obj, int rate)
{
const struct stream *s;
const char *fmt;
char *msg = NULL;
const void *ptr;
@ -2953,22 +2918,12 @@ void stream_dump_and_crash(enum obj_type *obj, int rate)
return;
}
if (rate < 0) {
fmt = "A bogus %s [%p] halted the whole thread for at least %d microseconds "
"resulting in a complete freeze of all other tasks, aborting now! Please "
"report this error to developers "
"[%s]\n";
rate = -rate;
}
else {
fmt = "A bogus %s [%p] is spinning at %d calls per second and refuses to die, "
"aborting now! Please report this error to developers "
"[%s]\n";
}
chunk_reset(&trash);
stream_dump(&trash, s, "", ' ');
memprintf(&msg, fmt,
memprintf(&msg,
"A bogus %s [%p] is spinning at %d calls per second and refuses to die, "
"aborting now! Please report this error to developers "
"[%s]\n",
obj_type_name(obj), ptr, rate, trash.area);
ha_alert("%s", msg);