mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-02-21 06:16:59 +00:00
avfilter/vf_v360: x86 SIMD for interpolations
This commit is contained in:
parent
f0d8005ec5
commit
058bbf48c6
113
libavfilter/v360.h
Normal file
113
libavfilter/v360.h
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Eugene Lyapustin
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_V360_H
|
||||||
|
#define AVFILTER_V360_H
|
||||||
|
#include "avfilter.h"
|
||||||
|
|
||||||
|
enum Projections {
|
||||||
|
EQUIRECTANGULAR,
|
||||||
|
CUBEMAP_3_2,
|
||||||
|
CUBEMAP_6_1,
|
||||||
|
EQUIANGULAR,
|
||||||
|
FLAT,
|
||||||
|
DUAL_FISHEYE,
|
||||||
|
BARREL,
|
||||||
|
CUBEMAP_1_6,
|
||||||
|
NB_PROJECTIONS,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum InterpMethod {
|
||||||
|
NEAREST,
|
||||||
|
BILINEAR,
|
||||||
|
BICUBIC,
|
||||||
|
LANCZOS,
|
||||||
|
NB_INTERP_METHODS,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Faces {
|
||||||
|
TOP_LEFT,
|
||||||
|
TOP_MIDDLE,
|
||||||
|
TOP_RIGHT,
|
||||||
|
BOTTOM_LEFT,
|
||||||
|
BOTTOM_MIDDLE,
|
||||||
|
BOTTOM_RIGHT,
|
||||||
|
NB_FACES,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Direction {
|
||||||
|
RIGHT, ///< Axis +X
|
||||||
|
LEFT, ///< Axis -X
|
||||||
|
UP, ///< Axis +Y
|
||||||
|
DOWN, ///< Axis -Y
|
||||||
|
FRONT, ///< Axis -Z
|
||||||
|
BACK, ///< Axis +Z
|
||||||
|
NB_DIRECTIONS,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Rotation {
|
||||||
|
ROT_0,
|
||||||
|
ROT_90,
|
||||||
|
ROT_180,
|
||||||
|
ROT_270,
|
||||||
|
NB_ROTATIONS,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct V360Context {
|
||||||
|
const AVClass *class;
|
||||||
|
int in, out;
|
||||||
|
int interp;
|
||||||
|
int width, height;
|
||||||
|
char* in_forder;
|
||||||
|
char* out_forder;
|
||||||
|
char* in_frot;
|
||||||
|
char* out_frot;
|
||||||
|
|
||||||
|
int in_cubemap_face_order[6];
|
||||||
|
int out_cubemap_direction_order[6];
|
||||||
|
int in_cubemap_face_rotation[6];
|
||||||
|
int out_cubemap_face_rotation[6];
|
||||||
|
|
||||||
|
float in_pad, out_pad;
|
||||||
|
|
||||||
|
float yaw, pitch, roll;
|
||||||
|
|
||||||
|
int h_flip, v_flip, d_flip;
|
||||||
|
|
||||||
|
float h_fov, v_fov;
|
||||||
|
float flat_range[3];
|
||||||
|
|
||||||
|
int planewidth[4], planeheight[4];
|
||||||
|
int inplanewidth[4], inplaneheight[4];
|
||||||
|
int nb_planes;
|
||||||
|
|
||||||
|
uint16_t *u[4], *v[4];
|
||||||
|
int16_t *ker[4];
|
||||||
|
|
||||||
|
int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
||||||
|
|
||||||
|
void (*remap_line)(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
|
||||||
|
const uint16_t *u, const uint16_t *v, const int16_t *ker);
|
||||||
|
} V360Context;
|
||||||
|
|
||||||
|
void ff_v360_init(V360Context *s, int depth);
|
||||||
|
void ff_v360_init_x86(V360Context *s, int depth);
|
||||||
|
|
||||||
|
#endif /* AVFILTER_V360_H */
|
@ -41,88 +41,7 @@
|
|||||||
#include "formats.h"
|
#include "formats.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
|
#include "v360.h"
|
||||||
enum Projections {
|
|
||||||
EQUIRECTANGULAR,
|
|
||||||
CUBEMAP_3_2,
|
|
||||||
CUBEMAP_6_1,
|
|
||||||
EQUIANGULAR,
|
|
||||||
FLAT,
|
|
||||||
DUAL_FISHEYE,
|
|
||||||
BARREL,
|
|
||||||
CUBEMAP_1_6,
|
|
||||||
NB_PROJECTIONS,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum InterpMethod {
|
|
||||||
NEAREST,
|
|
||||||
BILINEAR,
|
|
||||||
BICUBIC,
|
|
||||||
LANCZOS,
|
|
||||||
NB_INTERP_METHODS,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum Faces {
|
|
||||||
TOP_LEFT,
|
|
||||||
TOP_MIDDLE,
|
|
||||||
TOP_RIGHT,
|
|
||||||
BOTTOM_LEFT,
|
|
||||||
BOTTOM_MIDDLE,
|
|
||||||
BOTTOM_RIGHT,
|
|
||||||
NB_FACES,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum Direction {
|
|
||||||
RIGHT, ///< Axis +X
|
|
||||||
LEFT, ///< Axis -X
|
|
||||||
UP, ///< Axis +Y
|
|
||||||
DOWN, ///< Axis -Y
|
|
||||||
FRONT, ///< Axis -Z
|
|
||||||
BACK, ///< Axis +Z
|
|
||||||
NB_DIRECTIONS,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum Rotation {
|
|
||||||
ROT_0,
|
|
||||||
ROT_90,
|
|
||||||
ROT_180,
|
|
||||||
ROT_270,
|
|
||||||
NB_ROTATIONS,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct V360Context {
|
|
||||||
const AVClass *class;
|
|
||||||
int in, out;
|
|
||||||
int interp;
|
|
||||||
int width, height;
|
|
||||||
char* in_forder;
|
|
||||||
char* out_forder;
|
|
||||||
char* in_frot;
|
|
||||||
char* out_frot;
|
|
||||||
|
|
||||||
int in_cubemap_face_order[6];
|
|
||||||
int out_cubemap_direction_order[6];
|
|
||||||
int in_cubemap_face_rotation[6];
|
|
||||||
int out_cubemap_face_rotation[6];
|
|
||||||
|
|
||||||
float in_pad, out_pad;
|
|
||||||
|
|
||||||
float yaw, pitch, roll;
|
|
||||||
|
|
||||||
int h_flip, v_flip, d_flip;
|
|
||||||
|
|
||||||
float h_fov, v_fov;
|
|
||||||
float flat_range[3];
|
|
||||||
|
|
||||||
int planewidth[4], planeheight[4];
|
|
||||||
int inplanewidth[4], inplaneheight[4];
|
|
||||||
int nb_planes;
|
|
||||||
|
|
||||||
uint16_t *u[4], *v[4];
|
|
||||||
int16_t *ker[4];
|
|
||||||
|
|
||||||
int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
|
||||||
} V360Context;
|
|
||||||
|
|
||||||
typedef struct ThreadData {
|
typedef struct ThreadData {
|
||||||
AVFrame *in;
|
AVFrame *in;
|
||||||
@ -251,47 +170,22 @@ static int query_formats(AVFilterContext *ctx)
|
|||||||
return ff_set_common_formats(ctx, fmts_list);
|
return ff_set_common_formats(ctx, fmts_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
#define DEFINE_REMAP1_LINE(bits, div) \
|
||||||
* Generate no-interpolation remapping function with a given pixel depth.
|
static void remap1_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
|
||||||
*
|
ptrdiff_t in_linesize, \
|
||||||
* @param bits number of bits per pixel
|
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
|
||||||
* @param div number of bytes per pixel
|
{ \
|
||||||
*/
|
const uint##bits##_t *s = (const uint##bits##_t *)src; \
|
||||||
#define DEFINE_REMAP1(bits, div) \
|
uint##bits##_t *d = (uint##bits##_t *)dst; \
|
||||||
static int remap1_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
|
\
|
||||||
{ \
|
in_linesize /= div; \
|
||||||
ThreadData *td = (ThreadData*)arg; \
|
\
|
||||||
const V360Context *s = ctx->priv; \
|
for (int x = 0; x < width; x++) \
|
||||||
const AVFrame *in = td->in; \
|
d[x] = s[v[x] * in_linesize + u[x]]; \
|
||||||
AVFrame *out = td->out; \
|
|
||||||
\
|
|
||||||
int plane, x, y; \
|
|
||||||
\
|
|
||||||
for (plane = 0; plane < s->nb_planes; plane++) { \
|
|
||||||
const int in_linesize = in->linesize[plane] / div; \
|
|
||||||
const int out_linesize = out->linesize[plane] / div; \
|
|
||||||
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
|
|
||||||
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
|
|
||||||
const int width = s->planewidth[plane]; \
|
|
||||||
const int height = s->planeheight[plane]; \
|
|
||||||
\
|
|
||||||
const int slice_start = (height * jobnr ) / nb_jobs; \
|
|
||||||
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
|
|
||||||
\
|
|
||||||
for (y = slice_start; y < slice_end; y++) { \
|
|
||||||
const uint16_t *u = s->u[plane] + y * width; \
|
|
||||||
const uint16_t *v = s->v[plane] + y * width; \
|
|
||||||
uint##bits##_t *d = dst + y * out_linesize; \
|
|
||||||
for (x = 0; x < width; x++) \
|
|
||||||
*d++ = src[v[x] * in_linesize + u[x]]; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
return 0; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_REMAP1( 8, 1)
|
DEFINE_REMAP1_LINE( 8, 1)
|
||||||
DEFINE_REMAP1(16, 2)
|
DEFINE_REMAP1_LINE(16, 2)
|
||||||
|
|
||||||
typedef struct XYRemap {
|
typedef struct XYRemap {
|
||||||
uint16_t u[4][4];
|
uint16_t u[4][4];
|
||||||
@ -304,9 +198,8 @@ typedef struct XYRemap {
|
|||||||
*
|
*
|
||||||
* @param ws size of interpolation window
|
* @param ws size of interpolation window
|
||||||
* @param bits number of bits per pixel
|
* @param bits number of bits per pixel
|
||||||
* @param div number of bytes per pixel
|
|
||||||
*/
|
*/
|
||||||
#define DEFINE_REMAP(ws, bits, div) \
|
#define DEFINE_REMAP(ws, bits) \
|
||||||
static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
|
static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
|
||||||
{ \
|
{ \
|
||||||
ThreadData *td = (ThreadData*)arg; \
|
ThreadData *td = (ThreadData*)arg; \
|
||||||
@ -314,48 +207,85 @@ static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jo
|
|||||||
const AVFrame *in = td->in; \
|
const AVFrame *in = td->in; \
|
||||||
AVFrame *out = td->out; \
|
AVFrame *out = td->out; \
|
||||||
\
|
\
|
||||||
int plane, x, y, i, j; \
|
for (int plane = 0; plane < s->nb_planes; plane++) { \
|
||||||
\
|
const int in_linesize = in->linesize[plane]; \
|
||||||
for (plane = 0; plane < s->nb_planes; plane++) { \
|
const int out_linesize = out->linesize[plane]; \
|
||||||
const int in_linesize = in->linesize[plane] / div; \
|
const uint8_t *src = in->data[plane]; \
|
||||||
const int out_linesize = out->linesize[plane] / div; \
|
uint8_t *dst = out->data[plane]; \
|
||||||
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
|
|
||||||
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
|
|
||||||
const int width = s->planewidth[plane]; \
|
const int width = s->planewidth[plane]; \
|
||||||
const int height = s->planeheight[plane]; \
|
const int height = s->planeheight[plane]; \
|
||||||
\
|
\
|
||||||
const int slice_start = (height * jobnr ) / nb_jobs; \
|
const int slice_start = (height * jobnr ) / nb_jobs; \
|
||||||
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
|
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
|
||||||
\
|
\
|
||||||
for (y = slice_start; y < slice_end; y++) { \
|
for (int y = slice_start; y < slice_end; y++) { \
|
||||||
uint##bits##_t *d = dst + y * out_linesize; \
|
|
||||||
const uint16_t *u = s->u[plane] + y * width * ws * ws; \
|
const uint16_t *u = s->u[plane] + y * width * ws * ws; \
|
||||||
const uint16_t *v = s->v[plane] + y * width * ws * ws; \
|
const uint16_t *v = s->v[plane] + y * width * ws * ws; \
|
||||||
const int16_t *ker = s->ker[plane] + y * width * ws * ws; \
|
const int16_t *ker = s->ker[plane] + y * width * ws * ws; \
|
||||||
for (x = 0; x < width; x++) { \
|
|
||||||
const uint16_t *uu = u + x * ws * ws; \
|
|
||||||
const uint16_t *vv = v + x * ws * ws; \
|
|
||||||
const int16_t *kker = ker + x * ws * ws; \
|
|
||||||
int tmp = 0; \
|
|
||||||
\
|
\
|
||||||
for (i = 0; i < ws; i++) { \
|
s->remap_line(dst + y * out_linesize, width, src, in_linesize, u, v, ker); \
|
||||||
for (j = 0; j < ws; j++) { \
|
|
||||||
tmp += kker[i * ws + j] * src[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
*d++ = av_clip_uint##bits(tmp >> (15 - ws)); \
|
|
||||||
} \
|
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
return 0; \
|
return 0; \
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_REMAP(2, 8, 1)
|
DEFINE_REMAP(1, 8)
|
||||||
DEFINE_REMAP(4, 8, 1)
|
DEFINE_REMAP(2, 8)
|
||||||
DEFINE_REMAP(2, 16, 2)
|
DEFINE_REMAP(4, 8)
|
||||||
DEFINE_REMAP(4, 16, 2)
|
DEFINE_REMAP(1, 16)
|
||||||
|
DEFINE_REMAP(2, 16)
|
||||||
|
DEFINE_REMAP(4, 16)
|
||||||
|
|
||||||
|
#define DEFINE_REMAP_LINE(ws, bits, div) \
|
||||||
|
static void remap##ws##_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
|
||||||
|
ptrdiff_t in_linesize, \
|
||||||
|
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
|
||||||
|
{ \
|
||||||
|
const uint##bits##_t *s = (const uint##bits##_t *)src; \
|
||||||
|
uint##bits##_t *d = (uint##bits##_t *)dst; \
|
||||||
|
\
|
||||||
|
in_linesize /= div; \
|
||||||
|
\
|
||||||
|
for (int x = 0; x < width; x++) { \
|
||||||
|
const uint16_t *uu = u + x * ws * ws; \
|
||||||
|
const uint16_t *vv = v + x * ws * ws; \
|
||||||
|
const int16_t *kker = ker + x * ws * ws; \
|
||||||
|
int tmp = 0; \
|
||||||
|
\
|
||||||
|
for (int i = 0; i < ws; i++) { \
|
||||||
|
for (int j = 0; j < ws; j++) { \
|
||||||
|
tmp += kker[i * ws + j] * s[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
d[x] = av_clip_uint##bits(tmp >> 14); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_REMAP_LINE(2, 8, 1)
|
||||||
|
DEFINE_REMAP_LINE(4, 8, 1)
|
||||||
|
DEFINE_REMAP_LINE(2, 16, 2)
|
||||||
|
DEFINE_REMAP_LINE(4, 16, 2)
|
||||||
|
|
||||||
|
void ff_v360_init(V360Context *s, int depth)
|
||||||
|
{
|
||||||
|
switch (s->interp) {
|
||||||
|
case NEAREST:
|
||||||
|
s->remap_line = depth <= 8 ? remap1_8bit_line_c : remap1_16bit_line_c;
|
||||||
|
break;
|
||||||
|
case BILINEAR:
|
||||||
|
s->remap_line = depth <= 8 ? remap2_8bit_line_c : remap2_16bit_line_c;
|
||||||
|
break;
|
||||||
|
case BICUBIC:
|
||||||
|
case LANCZOS:
|
||||||
|
s->remap_line = depth <= 8 ? remap4_8bit_line_c : remap4_16bit_line_c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ARCH_X86_64)
|
||||||
|
ff_v360_init_x86(s, depth);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save nearest pixel coordinates for remapping.
|
* Save nearest pixel coordinates for remapping.
|
||||||
@ -399,10 +329,10 @@ static void bilinear_kernel(float du, float dv, const XYRemap *r_tmp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ker[0] = (1.f - du) * (1.f - dv) * 8192;
|
ker[0] = (1.f - du) * (1.f - dv) * 16384;
|
||||||
ker[1] = du * (1.f - dv) * 8192;
|
ker[1] = du * (1.f - dv) * 16384;
|
||||||
ker[2] = (1.f - du) * dv * 8192;
|
ker[2] = (1.f - du) * dv * 16384;
|
||||||
ker[3] = du * dv * 8192;
|
ker[3] = du * dv * 16384;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -446,7 +376,7 @@ static void bicubic_kernel(float du, float dv, const XYRemap *r_tmp,
|
|||||||
for (j = 0; j < 4; j++) {
|
for (j = 0; j < 4; j++) {
|
||||||
u[i * 4 + j] = r_tmp->u[i][j];
|
u[i * 4 + j] = r_tmp->u[i][j];
|
||||||
v[i * 4 + j] = r_tmp->v[i][j];
|
v[i * 4 + j] = r_tmp->v[i][j];
|
||||||
ker[i * 4 + j] = du_coeffs[j] * dv_coeffs[i] * 2048;
|
ker[i * 4 + j] = du_coeffs[j] * dv_coeffs[i] * 16384;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -501,7 +431,7 @@ static void lanczos_kernel(float du, float dv, const XYRemap *r_tmp,
|
|||||||
for (j = 0; j < 4; j++) {
|
for (j = 0; j < 4; j++) {
|
||||||
u[i * 4 + j] = r_tmp->u[i][j];
|
u[i * 4 + j] = r_tmp->u[i][j];
|
||||||
v[i * 4 + j] = r_tmp->v[i][j];
|
v[i * 4 + j] = r_tmp->v[i][j];
|
||||||
ker[i * 4 + j] = du_coeffs[j] * dv_coeffs[i] * 2048;
|
ker[i * 4 + j] = du_coeffs[j] * dv_coeffs[i] * 16384;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2038,6 +1968,8 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
av_assert0(0);
|
av_assert0(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ff_v360_init(s, depth);
|
||||||
|
|
||||||
switch (s->in) {
|
switch (s->in) {
|
||||||
case EQUIRECTANGULAR:
|
case EQUIRECTANGULAR:
|
||||||
in_transform = xyz_to_equirect;
|
in_transform = xyz_to_equirect;
|
||||||
|
@ -31,6 +31,7 @@ OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o
|
|||||||
OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o
|
OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o
|
||||||
OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
||||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
||||||
|
OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360_init.o
|
||||||
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
|
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
|
||||||
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
||||||
|
|
||||||
@ -66,5 +67,6 @@ X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o
|
|||||||
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold.o
|
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold.o
|
||||||
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o
|
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o
|
||||||
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
||||||
|
X86ASM-OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360.o
|
||||||
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif.o
|
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif.o
|
||||||
X86ASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|
X86ASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|
||||||
|
142
libavfilter/x86/vf_v360.asm
Normal file
142
libavfilter/x86/vf_v360.asm
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* x86-optimized functions for v360 filter
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
|
||||||
|
pb_mask: db 0,4,8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
|
||||||
|
pd_255: times 4 dd 255
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
|
||||||
|
; const uint16_t *u, const uint16_t *v, const int16_t *ker);
|
||||||
|
|
||||||
|
INIT_YMM avx2
|
||||||
|
cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x
|
||||||
|
movsxdifnidn widthq, widthd
|
||||||
|
xor xq, xq
|
||||||
|
movd xm0, in_linesized
|
||||||
|
pcmpeqw m4, m4
|
||||||
|
VBROADCASTI128 m3, [pb_mask]
|
||||||
|
vpbroadcastd m0, xm0
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
pmovsxwd m1, [vq + xq * 2]
|
||||||
|
pmovsxwd m2, [uq + xq * 2]
|
||||||
|
|
||||||
|
pmulld m1, m0
|
||||||
|
paddd m1, m2
|
||||||
|
mova m2, m4
|
||||||
|
vpgatherdd m5, [srcq + m1], m2
|
||||||
|
pshufb m1, m5, m3
|
||||||
|
vextracti128 xm2, m1, 1
|
||||||
|
movd [dstq+xq], xm1
|
||||||
|
movd [dstq+xq+4], xm2
|
||||||
|
|
||||||
|
add xq, mmsize / 4
|
||||||
|
cmp xq, widthq
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_YMM avx2
|
||||||
|
cglobal remap2_8bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x
|
||||||
|
movsxdifnidn widthq, widthd
|
||||||
|
xor xq, xq
|
||||||
|
movd xm0, in_linesized
|
||||||
|
pcmpeqw m7, m7
|
||||||
|
vpbroadcastd m0, xm0
|
||||||
|
vpbroadcastd m6, [pd_255]
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
pmovsxwd m1, [kerq + xq * 8]
|
||||||
|
pmovsxwd m2, [vq + xq * 8]
|
||||||
|
pmovsxwd m3, [uq + xq * 8]
|
||||||
|
|
||||||
|
pmulld m4, m2, m0
|
||||||
|
paddd m4, m3
|
||||||
|
mova m3, m7
|
||||||
|
vpgatherdd m2, [srcq + m4], m3
|
||||||
|
pand m2, m6
|
||||||
|
pmulld m2, m1
|
||||||
|
phaddd m2, m2
|
||||||
|
phaddd m1, m2, m2
|
||||||
|
psrld m1, m1, 0xe
|
||||||
|
vextracti128 xm2, m1, 1
|
||||||
|
|
||||||
|
pextrb [dstq+xq], xm1, 0
|
||||||
|
pextrb [dstq+xq+1], xm2, 0
|
||||||
|
|
||||||
|
add xq, mmsize / 16
|
||||||
|
cmp xq, widthq
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_YMM avx2
|
||||||
|
cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x, y
|
||||||
|
movsxdifnidn widthq, widthd
|
||||||
|
xor yq, yq
|
||||||
|
xor xq, xq
|
||||||
|
movd xm0, in_linesized
|
||||||
|
pcmpeqw m7, m7
|
||||||
|
vpbroadcastd m0, xm0
|
||||||
|
vpbroadcastd m6, [pd_255]
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
pmovsxwd m1, [kerq + yq]
|
||||||
|
pmovsxwd m5, [kerq + yq + 16]
|
||||||
|
pmovsxwd m2, [vq + yq]
|
||||||
|
pmovsxwd m8, [vq + yq + 16]
|
||||||
|
pmovsxwd m3, [uq + yq]
|
||||||
|
pmovsxwd m9, [uq + yq + 16]
|
||||||
|
|
||||||
|
pmulld m4, m2, m0
|
||||||
|
pmulld m10, m8, m0
|
||||||
|
paddd m4, m3
|
||||||
|
paddd m10, m9
|
||||||
|
mova m3, m7
|
||||||
|
vpgatherdd m2, [srcq + m4], m3
|
||||||
|
mova m3, m7
|
||||||
|
vpgatherdd m4, [srcq + m10], m3
|
||||||
|
pand m2, m6
|
||||||
|
pand m4, m6
|
||||||
|
pmulld m2, m1
|
||||||
|
pmulld m4, m5
|
||||||
|
|
||||||
|
paddd m2, m4
|
||||||
|
vextracti128 xm1, m2, 1
|
||||||
|
paddd m1, m2
|
||||||
|
phaddd m1, m1
|
||||||
|
phaddd m1, m1
|
||||||
|
psrld m1, m1, 0xe
|
||||||
|
packuswb m1, m1
|
||||||
|
|
||||||
|
pextrb [dstq+xq], xm1, 0
|
||||||
|
|
||||||
|
add xq, 1
|
||||||
|
add yq, 32
|
||||||
|
cmp xq, widthq
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
%endif
|
50
libavfilter/x86/vf_v360_init.c
Normal file
50
libavfilter/x86/vf_v360_init.c
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/v360.h"
|
||||||
|
|
||||||
|
void ff_remap1_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
|
||||||
|
const uint16_t *u, const uint16_t *v, const int16_t *ker);
|
||||||
|
|
||||||
|
void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
|
||||||
|
const uint16_t *u, const uint16_t *v, const int16_t *ker);
|
||||||
|
|
||||||
|
void ff_remap4_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
|
||||||
|
const uint16_t *u, const uint16_t *v, const int16_t *ker);
|
||||||
|
|
||||||
|
av_cold void ff_v360_init_x86(V360Context *s, int depth)
|
||||||
|
{
|
||||||
|
#if ARCH_X86_64
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == NEAREST && depth <= 8)
|
||||||
|
s->remap_line = ff_remap1_8bit_line_avx2;
|
||||||
|
|
||||||
|
if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == BILINEAR && depth <= 8)
|
||||||
|
s->remap_line = ff_remap2_8bit_line_avx2;
|
||||||
|
|
||||||
|
if (EXTERNAL_AVX2_FAST(cpu_flags) && (s->interp == BICUBIC ||
|
||||||
|
s->interp == LANCZOS) && depth <= 8)
|
||||||
|
s->remap_line = ff_remap4_8bit_line_avx2;
|
||||||
|
#endif
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user