Merge commit 'b384e031daeb1ac612620985e3e5377bc587559c'

* commit 'b384e031daeb1ac612620985e3e5377bc587559c':
  lavfi: add volume filter

Conflicts:
	Changelog
	libavfilter/Makefile
	libavfilter/af_volume.c
	libavfilter/version.h

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2012-12-06 15:18:59 +01:00
commit b38c79bf23
6 changed files with 514 additions and 92 deletions

View File

@ -701,96 +701,6 @@ tolerance in @file{silence.mp3}:
ffmpeg -f lavfi -i amovie=silence.mp3,silencedetect=noise=0.0001 -f null -
@end example
@section volume
Adjust the input audio volume.
The filter accepts exactly one parameter @var{vol}, which expresses
how the audio volume will be increased or decreased.
Output values are clipped to the maximum value.
If @var{vol} is expressed as a decimal number, the output audio
volume is given by the relation:
@example
@var{output_volume} = @var{vol} * @var{input_volume}
@end example
If @var{vol} is expressed as a decimal number followed by the string
"dB", the value represents the requested change in decibels of the
input audio power, and the output audio volume is given by the
relation:
@example
@var{output_volume} = 10^(@var{vol}/20) * @var{input_volume}
@end example
Otherwise @var{vol} is considered an expression and its evaluated
value is used for computing the output audio volume according to the
first relation.
Default value for @var{vol} is 1.0.
@subsection Examples
@itemize
@item
Half the input audio volume:
@example
volume=0.5
@end example
The above example is equivalent to:
@example
volume=1/2
@end example
@item
Decrease input audio power by 12 decibels:
@example
volume=-12dB
@end example
@end itemize
@section volumedetect
Detect the volume of the input video.
The filter has no parameters. The input is not modified. Statistics about
the volume will be printed in the log when the input stream end is reached.
In particular it will show the mean volume (root mean square), maximum
volume (on a per-sample basis), and the beginning of an histogram of the
registered volume values (from the maximum value to a cumulated 1/1000 of
the samples).
All volumes are in decibels relative to the maximum PCM value.
Here is an excerpt of the output:
@example
[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
@end example
It means that:
@itemize
@item
The mean square energy is approximately -27 dB, or 10^-2.7.
@item
The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
@item
There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
@end itemize
In other words, raising the volume by +4 dB does not cause any clipping,
raising it by +5 dB causes clipping for 6 samples, etc.
@section asyncts
Synchronize audio data with timestamps by squeezing/stretching it and/or
dropping samples/adding silence when needed.
@ -919,6 +829,149 @@ out
Convert the audio sample format, sample rate and channel layout. This filter is
not meant to be used directly.
@section volume
Adjust the input audio volume.
The filter accepts exactly one parameter @var{vol}, which expresses
how the audio volume will be increased or decreased.
Output values are clipped to the maximum value.
If @var{vol} is expressed as a decimal number, the output audio
volume is given by the relation:
@example
@var{output_volume} = @var{vol} * @var{input_volume}
@end example
If @var{vol} is expressed as a decimal number followed by the string
"dB", the value represents the requested change in decibels of the
input audio power, and the output audio volume is given by the
relation:
@example
@var{output_volume} = 10^(@var{vol}/20) * @var{input_volume}
@end example
Otherwise @var{vol} is considered an expression and its evaluated
value is used for computing the output audio volume according to the
first relation.
Default value for @var{vol} is 1.0.
@subsection Examples
@itemize
@item
Half the input audio volume:
@example
volume=0.5
@end example
The above example is equivalent to:
@example
volume=1/2
@end example
@item
Decrease input audio power by 12 decibels:
@example
volume=-12dB
@end example
@end itemize
@section volumedetect
Detect the volume of the input video.
The filter has no parameters. The input is not modified. Statistics about
the volume will be printed in the log when the input stream end is reached.
In particular it will show the mean volume (root mean square), maximum
volume (on a per-sample basis), and the beginning of an histogram of the
registered volume values (from the maximum value to a cumulated 1/1000 of
the samples).
All volumes are in decibels relative to the maximum PCM value.
Here is an excerpt of the output:
@example
[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
@end example
It means that:
@itemize
@item
The mean square energy is approximately -27 dB, or 10^-2.7.
@item
The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
@item
There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
@end itemize
In other words, raising the volume by +4 dB does not cause any clipping,
raising it by +5 dB causes clipping for 6 samples, etc.
@section volume_justin
Adjust the input audio volume.
The filter accepts the following named parameters:
@table @option
@item volume
Expresses how the audio volume will be increased or decreased.
Output values are clipped to the maximum value.
The output audio volume is given by the relation:
@example
@var{output_volume} = @var{volume} * @var{input_volume}
@end example
Default value for @var{volume} is 1.0.
@item precision
Mathematical precision.
This determines which input sample formats will be allowed, which affects the
precision of the volume scaling.
@table @option
@item fixed
8-bit fixed-point; limits input sample format to U8, S16, and S32.
@item float
32-bit floating-point; limits input sample format to FLT. (default)
@item double
64-bit floating-point; limits input sample format to DBL.
@end table
@end table
@subsection Examples
@itemize
@item
Halve the input audio volume:
@example
volume_justin=volume=0.5
volume_justin=volume=1/2
volume_justin=volume=-6.0206dB
@end example
@item
Increase input audio power by 6 decibels using fixed-point precision:
@example
volume_justin=volume=6dB:precision=fixed
@end example
@end itemize
@c man end AUDIO FILTERS
@chapter Audio Sources

View File

@ -72,6 +72,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o
OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o
OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o
OBJS-$(CONFIG_VOLUME_JUSTIN_FILTER) += af_volume_justin.o
OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o
OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o

53
libavfilter/af_volume.h Normal file
View File

@ -0,0 +1,53 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* audio volume filter
*/
#ifndef AVFILTER_AF_VOLUME_H
#define AVFILTER_AF_VOLUME_H
#include "libavutil/common.h"
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "libavutil/samplefmt.h"
enum PrecisionType {
PRECISION_FIXED = 0,
PRECISION_FLOAT,
PRECISION_DOUBLE,
};
typedef struct VolumeContext {
const AVClass *class;
AVFloatDSPContext fdsp;
enum PrecisionType precision;
double volume;
int volume_i;
int channels;
int planes;
enum AVSampleFormat sample_fmt;
void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples,
int volume);
int samples_align;
} VolumeContext;
#endif /* AVFILTER_AF_VOLUME_H */

View File

@ -0,0 +1,314 @@
/*
* Copyright (c) 2011 Stefano Sabatini
* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* audio volume filter
*/
#include "libavutil/audioconvert.h"
#include "libavutil/common.h"
#include "libavutil/eval.h"
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "audio.h"
#include "avfilter.h"
#include "formats.h"
#include "internal.h"
#include "af_volume.h"
static const char *precision_str[] = {
"fixed", "float", "double"
};
#define OFFSET(x) offsetof(VolumeContext, x)
#define A AV_OPT_FLAG_AUDIO_PARAM
static const AVOption options[] = {
{ "volume", "Volume adjustment.",
OFFSET(volume), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0, 0x7fffff, A },
{ "precision", "Mathematical precision.",
OFFSET(precision), AV_OPT_TYPE_INT, { .i64 = PRECISION_FLOAT }, PRECISION_FIXED, PRECISION_DOUBLE, A, "precision" },
{ "fixed", "8-bit fixed-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FIXED }, INT_MIN, INT_MAX, A, "precision" },
{ "float", "32-bit floating-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FLOAT }, INT_MIN, INT_MAX, A, "precision" },
{ "double", "64-bit floating-point.", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_DOUBLE }, INT_MIN, INT_MAX, A, "precision" },
{ NULL },
};
static const AVClass volume_class = {
.class_name = "volume filter",
.item_name = av_default_item_name,
.option = options,
.version = LIBAVUTIL_VERSION_INT,
};
static av_cold int init(AVFilterContext *ctx, const char *args)
{
VolumeContext *vol = ctx->priv;
int ret;
vol->class = &volume_class;
av_opt_set_defaults(vol);
if ((ret = av_set_options_string(vol, args, "=", ":")) < 0) {
av_log(ctx, AV_LOG_ERROR, "Error parsing options string '%s'.\n", args);
return ret;
}
if (vol->precision == PRECISION_FIXED) {
vol->volume_i = (int)(vol->volume * 256 + 0.5);
vol->volume = vol->volume_i / 256.0;
av_log(ctx, AV_LOG_VERBOSE, "volume:(%d/256)(%f)(%1.2fdB) precision:fixed\n",
vol->volume_i, vol->volume, 20.0*log(vol->volume)/M_LN10);
} else {
av_log(ctx, AV_LOG_VERBOSE, "volume:(%f)(%1.2fdB) precision:%s\n",
vol->volume, 20.0*log(vol->volume)/M_LN10,
precision_str[vol->precision]);
}
av_opt_free(vol);
return ret;
}
static int query_formats(AVFilterContext *ctx)
{
VolumeContext *vol = ctx->priv;
AVFilterFormats *formats = NULL;
AVFilterChannelLayouts *layouts;
static const enum AVSampleFormat sample_fmts[][7] = {
/* PRECISION_FIXED */
{
AV_SAMPLE_FMT_U8,
AV_SAMPLE_FMT_U8P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S32,
AV_SAMPLE_FMT_S32P,
AV_SAMPLE_FMT_NONE
},
/* PRECISION_FLOAT */
{
AV_SAMPLE_FMT_FLT,
AV_SAMPLE_FMT_FLTP,
AV_SAMPLE_FMT_NONE
},
/* PRECISION_DOUBLE */
{
AV_SAMPLE_FMT_DBL,
AV_SAMPLE_FMT_DBLP,
AV_SAMPLE_FMT_NONE
}
};
layouts = ff_all_channel_layouts();
if (!layouts)
return AVERROR(ENOMEM);
ff_set_common_channel_layouts(ctx, layouts);
formats = ff_make_format_list(sample_fmts[vol->precision]);
if (!formats)
return AVERROR(ENOMEM);
ff_set_common_formats(ctx, formats);
formats = ff_all_samplerates();
if (!formats)
return AVERROR(ENOMEM);
ff_set_common_samplerates(ctx, formats);
return 0;
}
static inline void scale_samples_u8(uint8_t *dst, const uint8_t *src,
int nb_samples, int volume)
{
int i;
for (i = 0; i < nb_samples; i++)
dst[i] = av_clip_uint8(((((int64_t)src[i] - 128) * volume + 128) >> 8) + 128);
}
static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src,
int nb_samples, int volume)
{
int i;
for (i = 0; i < nb_samples; i++)
dst[i] = av_clip_uint8((((src[i] - 128) * volume + 128) >> 8) + 128);
}
static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src,
int nb_samples, int volume)
{
int i;
int16_t *smp_dst = (int16_t *)dst;
const int16_t *smp_src = (const int16_t *)src;
for (i = 0; i < nb_samples; i++)
smp_dst[i] = av_clip_int16(((int64_t)smp_src[i] * volume + 128) >> 8);
}
static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src,
int nb_samples, int volume)
{
int i;
int16_t *smp_dst = (int16_t *)dst;
const int16_t *smp_src = (const int16_t *)src;
for (i = 0; i < nb_samples; i++)
smp_dst[i] = av_clip_int16((smp_src[i] * volume + 128) >> 8);
}
static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src,
int nb_samples, int volume)
{
int i;
int32_t *smp_dst = (int32_t *)dst;
const int32_t *smp_src = (const int32_t *)src;
for (i = 0; i < nb_samples; i++)
smp_dst[i] = av_clipl_int32((((int64_t)smp_src[i] * volume + 128) >> 8));
}
static void volume_init(VolumeContext *vol)
{
vol->samples_align = 1;
switch (av_get_packed_sample_fmt(vol->sample_fmt)) {
case AV_SAMPLE_FMT_U8:
if (vol->volume_i < 0x1000000)
vol->scale_samples = scale_samples_u8_small;
else
vol->scale_samples = scale_samples_u8;
break;
case AV_SAMPLE_FMT_S16:
if (vol->volume_i < 0x10000)
vol->scale_samples = scale_samples_s16_small;
else
vol->scale_samples = scale_samples_s16;
break;
case AV_SAMPLE_FMT_S32:
vol->scale_samples = scale_samples_s32;
break;
case AV_SAMPLE_FMT_FLT:
avpriv_float_dsp_init(&vol->fdsp, 0);
vol->samples_align = 4;
break;
case AV_SAMPLE_FMT_DBL:
avpriv_float_dsp_init(&vol->fdsp, 0);
vol->samples_align = 8;
break;
}
}
static int config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
VolumeContext *vol = ctx->priv;
AVFilterLink *inlink = ctx->inputs[0];
vol->sample_fmt = inlink->format;
vol->channels = av_get_channel_layout_nb_channels(inlink->channel_layout);
vol->planes = av_sample_fmt_is_planar(inlink->format) ? vol->channels : 1;
volume_init(vol);
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf)
{
VolumeContext *vol = inlink->dst->priv;
AVFilterLink *outlink = inlink->dst->outputs[0];
int nb_samples = buf->audio->nb_samples;
AVFilterBufferRef *out_buf;
if (vol->volume == 1.0 || vol->volume_i == 256)
return ff_filter_frame(outlink, buf);
/* do volume scaling in-place if input buffer is writable */
if (buf->perms & AV_PERM_WRITE) {
out_buf = buf;
} else {
out_buf = ff_get_audio_buffer(inlink, AV_PERM_WRITE, nb_samples);
if (!out_buf)
return AVERROR(ENOMEM);
out_buf->pts = buf->pts;
}
if (vol->precision != PRECISION_FIXED || vol->volume_i > 0) {
int p, plane_samples;
if (av_sample_fmt_is_planar(buf->format))
plane_samples = FFALIGN(nb_samples, vol->samples_align);
else
plane_samples = FFALIGN(nb_samples * vol->channels, vol->samples_align);
if (vol->precision == PRECISION_FIXED) {
for (p = 0; p < vol->planes; p++) {
vol->scale_samples(out_buf->extended_data[p],
buf->extended_data[p], plane_samples,
vol->volume_i);
}
} else if (av_get_packed_sample_fmt(vol->sample_fmt) == AV_SAMPLE_FMT_FLT) {
for (p = 0; p < vol->planes; p++) {
vol->fdsp.vector_fmul_scalar((float *)out_buf->extended_data[p],
(const float *)buf->extended_data[p],
vol->volume, plane_samples);
}
} else {
for (p = 0; p < vol->planes; p++) {
vol->fdsp.vector_dmul_scalar((double *)out_buf->extended_data[p],
(const double *)buf->extended_data[p],
vol->volume, plane_samples);
}
}
}
if (buf != out_buf)
avfilter_unref_buffer(buf);
return ff_filter_frame(outlink, out_buf);
}
static const AVFilterPad avfilter_af_volume_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_AUDIO,
.filter_frame = filter_frame,
},
{ NULL }
};
static const AVFilterPad avfilter_af_volume_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_AUDIO,
.config_props = config_output,
},
{ NULL }
};
AVFilter avfilter_af_volume_justin = {
.name = "volume_justin",
.description = NULL_IF_CONFIG_SMALL("Change input volume."),
.query_formats = query_formats,
.priv_size = sizeof(VolumeContext),
.init = init,
.inputs = avfilter_af_volume_inputs,
.outputs = avfilter_af_volume_outputs,
};

View File

@ -61,10 +61,11 @@ void avfilter_register_all(void)
REGISTER_FILTER (EBUR128, ebur128, af);
REGISTER_FILTER (JOIN, join, af);
REGISTER_FILTER (PAN, pan, af);
REGISTER_FILTER (RESAMPLE, resample, af);
REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
REGISTER_FILTER (VOLUME, volume, af);
REGISTER_FILTER (VOLUME_JUSTIN, volume_justin, af);
REGISTER_FILTER (VOLUMEDETECT,volumedetect,af);
REGISTER_FILTER (RESAMPLE, resample, af);
REGISTER_FILTER (AEVALSRC, aevalsrc, asrc);
REGISTER_FILTER (ANULLSRC, anullsrc, asrc);

View File

@ -29,7 +29,7 @@
#include "libavutil/avutil.h"
#define LIBAVFILTER_VERSION_MAJOR 3
#define LIBAVFILTER_VERSION_MINOR 24
#define LIBAVFILTER_VERSION_MINOR 25
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \