diff --git a/doc/filters.texi b/doc/filters.texi index f74551725a..bce20f09fa 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -701,96 +701,6 @@ tolerance in @file{silence.mp3}: ffmpeg -f lavfi -i amovie=silence.mp3,silencedetect=noise=0.0001 -f null - @end example -@section volume - -Adjust the input audio volume. - -The filter accepts exactly one parameter @var{vol}, which expresses -how the audio volume will be increased or decreased. - -Output values are clipped to the maximum value. - -If @var{vol} is expressed as a decimal number, the output audio -volume is given by the relation: -@example -@var{output_volume} = @var{vol} * @var{input_volume} -@end example - -If @var{vol} is expressed as a decimal number followed by the string -"dB", the value represents the requested change in decibels of the -input audio power, and the output audio volume is given by the -relation: -@example -@var{output_volume} = 10^(@var{vol}/20) * @var{input_volume} -@end example - -Otherwise @var{vol} is considered an expression and its evaluated -value is used for computing the output audio volume according to the -first relation. - -Default value for @var{vol} is 1.0. - -@subsection Examples - -@itemize -@item -Half the input audio volume: -@example -volume=0.5 -@end example - -The above example is equivalent to: -@example -volume=1/2 -@end example - -@item -Decrease input audio power by 12 decibels: -@example -volume=-12dB -@end example -@end itemize - -@section volumedetect - -Detect the volume of the input video. - -The filter has no parameters. The input is not modified. Statistics about -the volume will be printed in the log when the input stream end is reached. - -In particular it will show the mean volume (root mean square), maximum -volume (on a per-sample basis), and the beginning of an histogram of the -registered volume values (from the maximum value to a cumulated 1/1000 of -the samples). - -All volumes are in decibels relative to the maximum PCM value. - -Here is an excerpt of the output: -@example -[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB -[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB -[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6 -[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62 -[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286 -[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042 -[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551 -[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609 -[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409 -@end example - -It means that: -@itemize -@item -The mean square energy is approximately -27 dB, or 10^-2.7. -@item -The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB. -@item -There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc. -@end itemize - -In other words, raising the volume by +4 dB does not cause any clipping, -raising it by +5 dB causes clipping for 6 samples, etc. - @section asyncts Synchronize audio data with timestamps by squeezing/stretching it and/or dropping samples/adding silence when needed. @@ -919,6 +829,149 @@ out Convert the audio sample format, sample rate and channel layout. This filter is not meant to be used directly. +@section volume + +Adjust the input audio volume. + +The filter accepts exactly one parameter @var{vol}, which expresses +how the audio volume will be increased or decreased. + +Output values are clipped to the maximum value. + +If @var{vol} is expressed as a decimal number, the output audio +volume is given by the relation: +@example +@var{output_volume} = @var{vol} * @var{input_volume} +@end example + +If @var{vol} is expressed as a decimal number followed by the string +"dB", the value represents the requested change in decibels of the +input audio power, and the output audio volume is given by the +relation: +@example +@var{output_volume} = 10^(@var{vol}/20) * @var{input_volume} +@end example + +Otherwise @var{vol} is considered an expression and its evaluated +value is used for computing the output audio volume according to the +first relation. + +Default value for @var{vol} is 1.0. + +@subsection Examples + +@itemize +@item +Half the input audio volume: +@example +volume=0.5 +@end example + +The above example is equivalent to: +@example +volume=1/2 +@end example + +@item +Decrease input audio power by 12 decibels: +@example +volume=-12dB +@end example +@end itemize + +@section volumedetect + +Detect the volume of the input video. + +The filter has no parameters. The input is not modified. Statistics about +the volume will be printed in the log when the input stream end is reached. + +In particular it will show the mean volume (root mean square), maximum +volume (on a per-sample basis), and the beginning of an histogram of the +registered volume values (from the maximum value to a cumulated 1/1000 of +the samples). + +All volumes are in decibels relative to the maximum PCM value. + +Here is an excerpt of the output: +@example +[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB +[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB +[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6 +[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62 +[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286 +[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042 +[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551 +[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609 +[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409 +@end example + +It means that: +@itemize +@item +The mean square energy is approximately -27 dB, or 10^-2.7. +@item +The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB. +@item +There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc. +@end itemize + +In other words, raising the volume by +4 dB does not cause any clipping, +raising it by +5 dB causes clipping for 6 samples, etc. + +@section volume_justin + +Adjust the input audio volume. + +The filter accepts the following named parameters: +@table @option + +@item volume +Expresses how the audio volume will be increased or decreased. + +Output values are clipped to the maximum value. + +The output audio volume is given by the relation: +@example +@var{output_volume} = @var{volume} * @var{input_volume} +@end example + +Default value for @var{volume} is 1.0. + +@item precision +Mathematical precision. + +This determines which input sample formats will be allowed, which affects the +precision of the volume scaling. + +@table @option +@item fixed +8-bit fixed-point; limits input sample format to U8, S16, and S32. +@item float +32-bit floating-point; limits input sample format to FLT. (default) +@item double +64-bit floating-point; limits input sample format to DBL. +@end table +@end table + +@subsection Examples + +@itemize +@item +Halve the input audio volume: +@example +volume_justin=volume=0.5 +volume_justin=volume=1/2 +volume_justin=volume=-6.0206dB +@end example + +@item +Increase input audio power by 6 decibels using fixed-point precision: +@example +volume_justin=volume=6dB:precision=fixed +@end example +@end itemize + @c man end AUDIO FILTERS @chapter Audio Sources diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 377bd4d701..b8d546e35d 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -72,6 +72,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o +OBJS-$(CONFIG_VOLUME_JUSTIN_FILTER) += af_volume_justin.o OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h new file mode 100644 index 0000000000..1007618c0a --- /dev/null +++ b/libavfilter/af_volume.h @@ -0,0 +1,53 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio volume filter + */ + +#ifndef AVFILTER_AF_VOLUME_H +#define AVFILTER_AF_VOLUME_H + +#include "libavutil/common.h" +#include "libavutil/float_dsp.h" +#include "libavutil/opt.h" +#include "libavutil/samplefmt.h" + +enum PrecisionType { + PRECISION_FIXED = 0, + PRECISION_FLOAT, + PRECISION_DOUBLE, +}; + +typedef struct VolumeContext { + const AVClass *class; + AVFloatDSPContext fdsp; + enum PrecisionType precision; + double volume; + int volume_i; + int channels; + int planes; + enum AVSampleFormat sample_fmt; + + void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples, + int volume); + int samples_align; +} VolumeContext; + +#endif /* AVFILTER_AF_VOLUME_H */ diff --git a/libavfilter/af_volume_justin.c b/libavfilter/af_volume_justin.c new file mode 100644 index 0000000000..1d7c219298 --- /dev/null +++ b/libavfilter/af_volume_justin.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2011 Stefano Sabatini + * Copyright (c) 2012 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio volume filter + */ + +#include "libavutil/audioconvert.h" +#include "libavutil/common.h" +#include "libavutil/eval.h" +#include "libavutil/float_dsp.h" +#include "libavutil/opt.h" +#include "audio.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "af_volume.h" + +static const char *precision_str[] = { + "fixed", "float", "double" +}; + +#define OFFSET(x) offsetof(VolumeContext, x) +#define A AV_OPT_FLAG_AUDIO_PARAM + +static const AVOption options[] = { + { "volume", "Volume adjustment.", + OFFSET(volume), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0, 0x7fffff, A }, + { "precision", "Mathematical precision.", + OFFSET(precision), AV_OPT_TYPE_INT, { .i64 = PRECISION_FLOAT }, PRECISION_FIXED, PRECISION_DOUBLE, A, "precision" }, + { "fixed", "8-bit fixed-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FIXED }, INT_MIN, INT_MAX, A, "precision" }, + { "float", "32-bit floating-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FLOAT }, INT_MIN, INT_MAX, A, "precision" }, + { "double", "64-bit floating-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_DOUBLE }, INT_MIN, INT_MAX, A, "precision" }, + { NULL }, +}; + +static const AVClass volume_class = { + .class_name = "volume filter", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static av_cold int init(AVFilterContext *ctx, const char *args) +{ + VolumeContext *vol = ctx->priv; + int ret; + + vol->class = &volume_class; + av_opt_set_defaults(vol); + + if ((ret = av_set_options_string(vol, args, "=", ":")) < 0) { + av_log(ctx, AV_LOG_ERROR, "Error parsing options string '%s'.\n", args); + return ret; + } + + if (vol->precision == PRECISION_FIXED) { + vol->volume_i = (int)(vol->volume * 256 + 0.5); + vol->volume = vol->volume_i / 256.0; + av_log(ctx, AV_LOG_VERBOSE, "volume:(%d/256)(%f)(%1.2fdB) precision:fixed\n", + vol->volume_i, vol->volume, 20.0*log(vol->volume)/M_LN10); + } else { + av_log(ctx, AV_LOG_VERBOSE, "volume:(%f)(%1.2fdB) precision:%s\n", + vol->volume, 20.0*log(vol->volume)/M_LN10, + precision_str[vol->precision]); + } + + av_opt_free(vol); + return ret; +} + +static int query_formats(AVFilterContext *ctx) +{ + VolumeContext *vol = ctx->priv; + AVFilterFormats *formats = NULL; + AVFilterChannelLayouts *layouts; + static const enum AVSampleFormat sample_fmts[][7] = { + /* PRECISION_FIXED */ + { + AV_SAMPLE_FMT_U8, + AV_SAMPLE_FMT_U8P, + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S32, + AV_SAMPLE_FMT_S32P, + AV_SAMPLE_FMT_NONE + }, + /* PRECISION_FLOAT */ + { + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_NONE + }, + /* PRECISION_DOUBLE */ + { + AV_SAMPLE_FMT_DBL, + AV_SAMPLE_FMT_DBLP, + AV_SAMPLE_FMT_NONE + } + }; + + layouts = ff_all_channel_layouts(); + if (!layouts) + return AVERROR(ENOMEM); + ff_set_common_channel_layouts(ctx, layouts); + + formats = ff_make_format_list(sample_fmts[vol->precision]); + if (!formats) + return AVERROR(ENOMEM); + ff_set_common_formats(ctx, formats); + + formats = ff_all_samplerates(); + if (!formats) + return AVERROR(ENOMEM); + ff_set_common_samplerates(ctx, formats); + + return 0; +} + +static inline void scale_samples_u8(uint8_t *dst, const uint8_t *src, + int nb_samples, int volume) +{ + int i; + for (i = 0; i < nb_samples; i++) + dst[i] = av_clip_uint8(((((int64_t)src[i] - 128) * volume + 128) >> 8) + 128); +} + +static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src, + int nb_samples, int volume) +{ + int i; + for (i = 0; i < nb_samples; i++) + dst[i] = av_clip_uint8((((src[i] - 128) * volume + 128) >> 8) + 128); +} + +static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src, + int nb_samples, int volume) +{ + int i; + int16_t *smp_dst = (int16_t *)dst; + const int16_t *smp_src = (const int16_t *)src; + for (i = 0; i < nb_samples; i++) + smp_dst[i] = av_clip_int16(((int64_t)smp_src[i] * volume + 128) >> 8); +} + +static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src, + int nb_samples, int volume) +{ + int i; + int16_t *smp_dst = (int16_t *)dst; + const int16_t *smp_src = (const int16_t *)src; + for (i = 0; i < nb_samples; i++) + smp_dst[i] = av_clip_int16((smp_src[i] * volume + 128) >> 8); +} + +static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src, + int nb_samples, int volume) +{ + int i; + int32_t *smp_dst = (int32_t *)dst; + const int32_t *smp_src = (const int32_t *)src; + for (i = 0; i < nb_samples; i++) + smp_dst[i] = av_clipl_int32((((int64_t)smp_src[i] * volume + 128) >> 8)); +} + + + +static void volume_init(VolumeContext *vol) +{ + vol->samples_align = 1; + + switch (av_get_packed_sample_fmt(vol->sample_fmt)) { + case AV_SAMPLE_FMT_U8: + if (vol->volume_i < 0x1000000) + vol->scale_samples = scale_samples_u8_small; + else + vol->scale_samples = scale_samples_u8; + break; + case AV_SAMPLE_FMT_S16: + if (vol->volume_i < 0x10000) + vol->scale_samples = scale_samples_s16_small; + else + vol->scale_samples = scale_samples_s16; + break; + case AV_SAMPLE_FMT_S32: + vol->scale_samples = scale_samples_s32; + break; + case AV_SAMPLE_FMT_FLT: + avpriv_float_dsp_init(&vol->fdsp, 0); + vol->samples_align = 4; + break; + case AV_SAMPLE_FMT_DBL: + avpriv_float_dsp_init(&vol->fdsp, 0); + vol->samples_align = 8; + break; + } +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + VolumeContext *vol = ctx->priv; + AVFilterLink *inlink = ctx->inputs[0]; + + vol->sample_fmt = inlink->format; + vol->channels = av_get_channel_layout_nb_channels(inlink->channel_layout); + vol->planes = av_sample_fmt_is_planar(inlink->format) ? vol->channels : 1; + + volume_init(vol); + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf) +{ + VolumeContext *vol = inlink->dst->priv; + AVFilterLink *outlink = inlink->dst->outputs[0]; + int nb_samples = buf->audio->nb_samples; + AVFilterBufferRef *out_buf; + + if (vol->volume == 1.0 || vol->volume_i == 256) + return ff_filter_frame(outlink, buf); + + /* do volume scaling in-place if input buffer is writable */ + if (buf->perms & AV_PERM_WRITE) { + out_buf = buf; + } else { + out_buf = ff_get_audio_buffer(inlink, AV_PERM_WRITE, nb_samples); + if (!out_buf) + return AVERROR(ENOMEM); + out_buf->pts = buf->pts; + } + + if (vol->precision != PRECISION_FIXED || vol->volume_i > 0) { + int p, plane_samples; + + if (av_sample_fmt_is_planar(buf->format)) + plane_samples = FFALIGN(nb_samples, vol->samples_align); + else + plane_samples = FFALIGN(nb_samples * vol->channels, vol->samples_align); + + if (vol->precision == PRECISION_FIXED) { + for (p = 0; p < vol->planes; p++) { + vol->scale_samples(out_buf->extended_data[p], + buf->extended_data[p], plane_samples, + vol->volume_i); + } + } else if (av_get_packed_sample_fmt(vol->sample_fmt) == AV_SAMPLE_FMT_FLT) { + for (p = 0; p < vol->planes; p++) { + vol->fdsp.vector_fmul_scalar((float *)out_buf->extended_data[p], + (const float *)buf->extended_data[p], + vol->volume, plane_samples); + } + } else { + for (p = 0; p < vol->planes; p++) { + vol->fdsp.vector_dmul_scalar((double *)out_buf->extended_data[p], + (const double *)buf->extended_data[p], + vol->volume, plane_samples); + } + } + } + + if (buf != out_buf) + avfilter_unref_buffer(buf); + + return ff_filter_frame(outlink, out_buf); +} + +static const AVFilterPad avfilter_af_volume_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad avfilter_af_volume_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, + { NULL } +}; + +AVFilter avfilter_af_volume_justin = { + .name = "volume_justin", + .description = NULL_IF_CONFIG_SMALL("Change input volume."), + .query_formats = query_formats, + .priv_size = sizeof(VolumeContext), + .init = init, + .inputs = avfilter_af_volume_inputs, + .outputs = avfilter_af_volume_outputs, +}; diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 5a1f939781..f9cacfc9bf 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -61,10 +61,11 @@ void avfilter_register_all(void) REGISTER_FILTER (EBUR128, ebur128, af); REGISTER_FILTER (JOIN, join, af); REGISTER_FILTER (PAN, pan, af); + REGISTER_FILTER (RESAMPLE, resample, af); REGISTER_FILTER (SILENCEDETECT, silencedetect, af); REGISTER_FILTER (VOLUME, volume, af); + REGISTER_FILTER (VOLUME_JUSTIN, volume_justin, af); REGISTER_FILTER (VOLUMEDETECT,volumedetect,af); - REGISTER_FILTER (RESAMPLE, resample, af); REGISTER_FILTER (AEVALSRC, aevalsrc, asrc); REGISTER_FILTER (ANULLSRC, anullsrc, asrc); diff --git a/libavfilter/version.h b/libavfilter/version.h index 694f4f0ab4..bf5f4969f0 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -29,7 +29,7 @@ #include "libavutil/avutil.h" #define LIBAVFILTER_VERSION_MAJOR 3 -#define LIBAVFILTER_VERSION_MINOR 24 +#define LIBAVFILTER_VERSION_MINOR 25 #define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \