mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 16:52:31 +00:00
avfilter/vf_convolution: add 16-column operation for filter_column()
Based on patch by Xu Jun <xujunzz@sjtu.edu.cn>
This commit is contained in:
parent
63231fa8d3
commit
c6ce18be08
@ -57,7 +57,7 @@ typedef struct ConvolutionContext {
|
||||
void (*filter[4])(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride);
|
||||
int dstride, int stride, int size);
|
||||
} ConvolutionContext;
|
||||
|
||||
void ff_convolution_init_x86(ConvolutionContext *s);
|
||||
|
@ -110,7 +110,7 @@ typedef struct ThreadData {
|
||||
static void filter16_prewitt(uint8_t *dstp, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -128,7 +128,7 @@ static void filter16_prewitt(uint8_t *dstp, int width,
|
||||
static void filter16_roberts(uint8_t *dstp, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -144,7 +144,7 @@ static void filter16_roberts(uint8_t *dstp, int width,
|
||||
static void filter16_sobel(uint8_t *dstp, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -162,7 +162,7 @@ static void filter16_sobel(uint8_t *dstp, int width,
|
||||
static void filter16_kirsch(uint8_t *dstp, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
const uint16_t *c0 = (const uint16_t *)c[0], *c1 = (const uint16_t *)c[1], *c2 = (const uint16_t *)c[2];
|
||||
@ -211,7 +211,7 @@ static void filter16_kirsch(uint8_t *dstp, int width,
|
||||
static void filter_prewitt(uint8_t *dst, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
|
||||
const uint8_t *c3 = c[3], *c5 = c[5];
|
||||
@ -231,7 +231,7 @@ static void filter_prewitt(uint8_t *dst, int width,
|
||||
static void filter_roberts(uint8_t *dst, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
int x;
|
||||
|
||||
@ -246,7 +246,7 @@ static void filter_roberts(uint8_t *dst, int width,
|
||||
static void filter_sobel(uint8_t *dst, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
|
||||
const uint8_t *c3 = c[3], *c5 = c[5];
|
||||
@ -266,7 +266,7 @@ static void filter_sobel(uint8_t *dst, int width,
|
||||
static void filter_kirsch(uint8_t *dst, int width,
|
||||
float scale, float delta, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
|
||||
const uint8_t *c3 = c[3], *c5 = c[5];
|
||||
@ -314,7 +314,7 @@ static void filter_kirsch(uint8_t *dst, int width,
|
||||
static void filter16_3x3(uint8_t *dstp, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -337,7 +337,7 @@ static void filter16_3x3(uint8_t *dstp, int width,
|
||||
static void filter16_5x5(uint8_t *dstp, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -356,7 +356,7 @@ static void filter16_5x5(uint8_t *dstp, int width,
|
||||
static void filter16_7x7(uint8_t *dstp, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -375,7 +375,7 @@ static void filter16_7x7(uint8_t *dstp, int width,
|
||||
static void filter16_row(uint8_t *dstp, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int x;
|
||||
@ -394,19 +394,24 @@ static void filter16_row(uint8_t *dstp, int width,
|
||||
static void filter16_column(uint8_t *dstp, int height,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
DECLARE_ALIGNED(64, int, sum)[16];
|
||||
uint16_t *dst = (uint16_t *)dstp;
|
||||
int y;
|
||||
const int width = FFMIN(16, size);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
int i, sum = 0;
|
||||
for (int y = 0; y < height; y++) {
|
||||
|
||||
for (i = 0; i < 2 * radius + 1; i++)
|
||||
sum += AV_RN16A(&c[i][0 + y * stride]) * matrix[i];
|
||||
memset(sum, 0, sizeof(sum));
|
||||
for (int i = 0; i < 2 * radius + 1; i++) {
|
||||
for (int off16 = 0; off16 < width; off16++)
|
||||
sum[off16] += AV_RN16A(&c[i][0 + y * stride + off16 * 2]) * matrix[i];
|
||||
}
|
||||
|
||||
sum = (int)(sum * rdiv + bias + 0.5f);
|
||||
dst[0] = av_clip(sum, 0, peak);
|
||||
for (int off16 = 0; off16 < width; off16++) {
|
||||
sum[off16] = (int)(sum[off16] * rdiv + bias + 0.5f);
|
||||
dst[off16] = av_clip(sum[off16], 0, peak);
|
||||
}
|
||||
dst += dstride / 2;
|
||||
}
|
||||
}
|
||||
@ -414,7 +419,7 @@ static void filter16_column(uint8_t *dstp, int height,
|
||||
static void filter_7x7(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
int x;
|
||||
|
||||
@ -432,7 +437,7 @@ static void filter_7x7(uint8_t *dst, int width,
|
||||
static void filter_5x5(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
int x;
|
||||
|
||||
@ -450,7 +455,7 @@ static void filter_5x5(uint8_t *dst, int width,
|
||||
static void filter_3x3(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
|
||||
const uint8_t *c3 = c[3], *c4 = c[4], *c5 = c[5];
|
||||
@ -469,7 +474,7 @@ static void filter_3x3(uint8_t *dst, int width,
|
||||
static void filter_row(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
int x;
|
||||
|
||||
@ -487,18 +492,22 @@ static void filter_row(uint8_t *dst, int width,
|
||||
static void filter_column(uint8_t *dst, int height,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride)
|
||||
int dstride, int stride, int size)
|
||||
{
|
||||
int y;
|
||||
DECLARE_ALIGNED(64, int, sum)[16];
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
int i, sum = 0;
|
||||
for (int y = 0; y < height; y++) {
|
||||
memset(sum, 0, sizeof(sum));
|
||||
|
||||
for (i = 0; i < 2 * radius + 1; i++)
|
||||
sum += c[i][0 + y * stride] * matrix[i];
|
||||
for (int i = 0; i < 2 * radius + 1; i++) {
|
||||
for (int off16 = 0; off16 < 16; off16++)
|
||||
sum[off16] += c[i][0 + y * stride + off16] * matrix[i];
|
||||
}
|
||||
|
||||
sum = (int)(sum * rdiv + bias + 0.5f);
|
||||
dst[0] = av_clip_uint8(sum);
|
||||
for (int off16 = 0; off16 < 16; off16++) {
|
||||
sum[off16] = (int)(sum[off16] * rdiv + bias + 0.5f);
|
||||
dst[off16] = av_clip_uint8(sum[off16]);
|
||||
}
|
||||
dst += dstride;
|
||||
}
|
||||
}
|
||||
@ -605,6 +614,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||
const int dst_pos = slice_start * (mode == MATRIX_COLUMN ? bpc : dstride);
|
||||
uint8_t *dst = out->data[plane] + dst_pos;
|
||||
const int *matrix = s->matrix[plane];
|
||||
const int step = mode == MATRIX_COLUMN ? 16 : 1;
|
||||
const uint8_t *c[49];
|
||||
int y, x;
|
||||
|
||||
@ -617,8 +627,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||
width * bpc, slice_end - slice_start);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
for (y = slice_start; y < slice_end; y += step) {
|
||||
const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc;
|
||||
const int yoff = mode == MATRIX_COLUMN ? radius * dstride : 0;
|
||||
|
||||
@ -629,12 +638,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||
s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
|
||||
s->filter[plane](dst + yoff + xoff, 1, rdiv,
|
||||
bias, matrix, c, s->max, radius,
|
||||
dstride, stride);
|
||||
dstride, stride, slice_end - step);
|
||||
}
|
||||
s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
|
||||
s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
|
||||
rdiv, bias, matrix, c, s->max, radius,
|
||||
dstride, stride);
|
||||
dstride, stride, slice_end - step);
|
||||
for (x = sizew - radius; x < sizew; x++) {
|
||||
const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc;
|
||||
const int yoff = mode == MATRIX_COLUMN ? x * dstride : 0;
|
||||
@ -642,7 +651,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||
s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
|
||||
s->filter[plane](dst + yoff + xoff, 1, rdiv,
|
||||
bias, matrix, c, s->max, radius,
|
||||
dstride, stride);
|
||||
dstride, stride, slice_end - step);
|
||||
}
|
||||
if (mode != MATRIX_COLUMN)
|
||||
dst += dstride;
|
||||
|
@ -27,7 +27,7 @@
|
||||
void ff_filter_3x3_sse4(uint8_t *dst, int width,
|
||||
float rdiv, float bias, const int *const matrix,
|
||||
const uint8_t *c[], int peak, int radius,
|
||||
int dstride, int stride);
|
||||
int dstride, int stride, int size);
|
||||
|
||||
av_cold void ff_convolution_init_x86(ConvolutionContext *s)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user