diff --git a/doc/filters.texi b/doc/filters.texi index 32720dba41..e002f25932 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -17192,6 +17192,11 @@ event in time domain is represented more accurately (such as fast bass drum), otherwise event in frequency domain is represented more accurately (such as bass guitar). Acceptable range is @code{[0.002, 1]}. Default value is @code{0.17}. +@item attack +Set attack time in seconds. The default is @code{0} (disabled). Otherwise, it +limits future samples by applying asymmetric windowing in time domain, useful +when low latency is required. Accepted range is @code{[0, 1]}. + @item basefreq Specify the transform base frequency. Default value is @code{20.01523126408007475}, which is frequency 50 cents below E0. Acceptable range is @code{[10, 100000]}. diff --git a/libavfilter/avf_showcqt.c b/libavfilter/avf_showcqt.c index ede56f4b1c..7bc3a260c3 100644 --- a/libavfilter/avf_showcqt.c +++ b/libavfilter/avf_showcqt.c @@ -78,6 +78,7 @@ static const AVOption showcqt_options[] = { { "bar_t", "set bar transparency", OFFSET(bar_t), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 1.0, FLAGS }, { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.002, 1.0, FLAGS }, { "tc", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.002, 1.0, FLAGS }, + { "attack", "set attack time", OFFSET(attack), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0.0, 1.0, FLAGS }, { "basefreq", "set base frequency", OFFSET(basefreq), AV_OPT_TYPE_DOUBLE, { .dbl = BASEFREQ }, 10.0, 100000.0, FLAGS }, { "endfreq", "set end frequency", OFFSET(endfreq), AV_OPT_TYPE_DOUBLE, { .dbl = ENDFREQ }, 10.0, 100000.0, FLAGS }, { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.1, 10.0, FLAGS }, @@ -152,6 +153,7 @@ static void common_uninit(ShowCQTContext *s) av_freep(&s->fft_data); av_freep(&s->fft_result); av_freep(&s->cqt_result); + av_freep(&s->attack_data); av_freep(&s->c_buf); av_freep(&s->h_buf); av_freep(&s->rcp_h_buf); @@ -1138,6 +1140,14 @@ static int plot_cqt(AVFilterContext *ctx, AVFrame **frameout) last_time = av_gettime(); memcpy(s->fft_result, s->fft_data, s->fft_len * sizeof(*s->fft_data)); + if (s->attack_data) { + int k; + for (k = 0; k < s->remaining_fill_max; k++) { + s->fft_result[s->fft_len/2+k].re *= s->attack_data[k]; + s->fft_result[s->fft_len/2+k].im *= s->attack_data[k]; + } + } + av_fft_permute(s->fft_ctx, s->fft_result); av_fft_calc(s->fft_ctx, s->fft_result); s->fft_result[s->fft_len] = s->fft_result[0]; @@ -1377,6 +1387,21 @@ static int config_output(AVFilterLink *outlink) if (!s->fft_ctx || !s->fft_data || !s->fft_result || !s->cqt_result) return AVERROR(ENOMEM); + s->remaining_fill_max = s->fft_len / 2; + if (s->attack > 0.0) { + int k; + + s->remaining_fill_max = FFMIN(s->remaining_fill_max, ceil(inlink->sample_rate * s->attack)); + s->attack_data = av_malloc_array(s->remaining_fill_max, sizeof(*s->attack_data)); + if (!s->attack_data) + return AVERROR(ENOMEM); + + for (k = 0; k < s->remaining_fill_max; k++) { + double y = M_PI * k / (inlink->sample_rate * s->attack); + s->attack_data[k] = 0.355768 + 0.487396 * cos(y) + 0.144232 * cos(2*y) + 0.012604 * cos(3*y); + } + } + s->cqt_align = 1; s->cqt_calc = cqt_calc; s->permute_coeffs = NULL; @@ -1435,7 +1460,7 @@ static int config_output(AVFilterLink *outlink) s->sono_count = 0; s->next_pts = 0; s->sono_idx = 0; - s->remaining_fill = s->fft_len / 2; + s->remaining_fill = s->remaining_fill_max; s->remaining_frac = 0; s->step_frac = av_div_q(av_make_q(inlink->sample_rate, s->count) , s->rate); s->step = (int)(s->step_frac.num / s->step_frac.den); @@ -1463,15 +1488,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) AVFrame *out = NULL; if (!insamples) { - while (s->remaining_fill < s->fft_len / 2) { - memset(&s->fft_data[s->fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); + while (s->remaining_fill < s->remaining_fill_max) { + memset(&s->fft_data[s->fft_len/2 + s->remaining_fill_max - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); ret = plot_cqt(ctx, &out); if (ret < 0) return ret; step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den; s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den; - for (x = 0; x < (s->fft_len-step); x++) + for (x = 0; x < (s->fft_len/2 + s->remaining_fill_max - step); x++) s->fft_data[x] = s->fft_data[x+step]; s->remaining_fill += step; @@ -1486,7 +1511,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) while (remaining) { i = insamples->nb_samples - remaining; - j = s->fft_len - s->remaining_fill; + j = s->fft_len/2 + s->remaining_fill_max - s->remaining_fill; if (remaining >= s->remaining_fill) { for (m = 0; m < s->remaining_fill; m++) { s->fft_data[j+m].re = audio_data[2*(i+m)]; @@ -1500,7 +1525,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) remaining -= s->remaining_fill; if (out) { int64_t pts = av_rescale_q(insamples->pts, inlink->time_base, av_make_q(1, inlink->sample_rate)); - pts += insamples->nb_samples - remaining - s->fft_len/2; + pts += insamples->nb_samples - remaining - s->remaining_fill_max; pts = av_rescale_q(pts, av_make_q(1, inlink->sample_rate), outlink->time_base); if (FFABS(pts - out->pts) > PTS_TOLERANCE) { av_log(ctx, AV_LOG_DEBUG, "changing pts from %"PRId64" (%.3f) to %"PRId64" (%.3f).\n", @@ -1518,7 +1543,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) } step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den; s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den; - for (m = 0; m < s->fft_len-step; m++) + for (m = 0; m < s->fft_len/2 + s->remaining_fill_max - step; m++) s->fft_data[m] = s->fft_data[m+step]; s->remaining_fill = step; } else { diff --git a/libavfilter/avf_showcqt.h b/libavfilter/avf_showcqt.h index 3fa36f851c..010f85c951 100644 --- a/libavfilter/avf_showcqt.h +++ b/libavfilter/avf_showcqt.h @@ -55,6 +55,7 @@ typedef struct { AVRational step_frac; int remaining_frac; int remaining_fill; + int remaining_fill_max; int64_t next_pts; double *freq; FFTContext *fft_ctx; @@ -62,6 +63,7 @@ typedef struct { FFTComplex *fft_data; FFTComplex *fft_result; FFTComplex *cqt_result; + float *attack_data; int fft_bits; int fft_len; int cqt_len; @@ -104,6 +106,7 @@ typedef struct { float bar_g; float bar_t; double timeclamp; + double attack; double basefreq; double endfreq; float coeffclamp; /* deprecated - ignored */ diff --git a/libavfilter/version.h b/libavfilter/version.h index 6f33acb074..1f9c7996c3 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -31,7 +31,7 @@ #define LIBAVFILTER_VERSION_MAJOR 6 #define LIBAVFILTER_VERSION_MINOR 84 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ LIBAVFILTER_VERSION_MINOR, \