mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-27 09:52:17 +00:00
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions. Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
c96c73b0b0
commit
aef34ab950
File diff suppressed because it is too large
Load Diff
@ -129,6 +129,36 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
|
||||
c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_8_msa;
|
||||
c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_8_msa;
|
||||
|
||||
c->put_hevc_epel_uni[3][0][0] = ff_hevc_put_hevc_uni_pel_pixels8_8_msa;
|
||||
c->put_hevc_epel_uni[4][0][0] = ff_hevc_put_hevc_uni_pel_pixels12_8_msa;
|
||||
c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels16_8_msa;
|
||||
c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels24_8_msa;
|
||||
c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_msa;
|
||||
|
||||
c->put_hevc_epel_uni[1][0][1] = ff_hevc_put_hevc_uni_epel_h4_8_msa;
|
||||
c->put_hevc_epel_uni[2][0][1] = ff_hevc_put_hevc_uni_epel_h6_8_msa;
|
||||
c->put_hevc_epel_uni[3][0][1] = ff_hevc_put_hevc_uni_epel_h8_8_msa;
|
||||
c->put_hevc_epel_uni[4][0][1] = ff_hevc_put_hevc_uni_epel_h12_8_msa;
|
||||
c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_8_msa;
|
||||
c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_8_msa;
|
||||
c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_msa;
|
||||
|
||||
c->put_hevc_epel_uni[1][1][0] = ff_hevc_put_hevc_uni_epel_v4_8_msa;
|
||||
c->put_hevc_epel_uni[2][1][0] = ff_hevc_put_hevc_uni_epel_v6_8_msa;
|
||||
c->put_hevc_epel_uni[3][1][0] = ff_hevc_put_hevc_uni_epel_v8_8_msa;
|
||||
c->put_hevc_epel_uni[4][1][0] = ff_hevc_put_hevc_uni_epel_v12_8_msa;
|
||||
c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_8_msa;
|
||||
c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_8_msa;
|
||||
c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_msa;
|
||||
|
||||
c->put_hevc_epel_uni[1][1][1] = ff_hevc_put_hevc_uni_epel_hv4_8_msa;
|
||||
c->put_hevc_epel_uni[2][1][1] = ff_hevc_put_hevc_uni_epel_hv6_8_msa;
|
||||
c->put_hevc_epel_uni[3][1][1] = ff_hevc_put_hevc_uni_epel_hv8_8_msa;
|
||||
c->put_hevc_epel_uni[4][1][1] = ff_hevc_put_hevc_uni_epel_hv12_8_msa;
|
||||
c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_8_msa;
|
||||
c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_8_msa;
|
||||
c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_msa;
|
||||
|
||||
c->put_hevc_qpel_uni_w[1][0][0] =
|
||||
ff_hevc_put_hevc_uni_w_pel_pixels4_8_msa;
|
||||
c->put_hevc_qpel_uni_w[3][0][0] =
|
||||
|
@ -145,6 +145,36 @@ UNI_MC(qpel, hv, 32);
|
||||
UNI_MC(qpel, hv, 48);
|
||||
UNI_MC(qpel, hv, 64);
|
||||
|
||||
UNI_MC(epel, h, 4);
|
||||
UNI_MC(epel, h, 6);
|
||||
UNI_MC(epel, h, 8);
|
||||
UNI_MC(epel, h, 12);
|
||||
UNI_MC(epel, h, 16);
|
||||
UNI_MC(epel, h, 24);
|
||||
UNI_MC(epel, h, 32);
|
||||
UNI_MC(epel, h, 48);
|
||||
UNI_MC(epel, h, 64);
|
||||
|
||||
UNI_MC(epel, v, 4);
|
||||
UNI_MC(epel, v, 6);
|
||||
UNI_MC(epel, v, 8);
|
||||
UNI_MC(epel, v, 12);
|
||||
UNI_MC(epel, v, 16);
|
||||
UNI_MC(epel, v, 24);
|
||||
UNI_MC(epel, v, 32);
|
||||
UNI_MC(epel, v, 48);
|
||||
UNI_MC(epel, v, 64);
|
||||
|
||||
UNI_MC(epel, hv, 4);
|
||||
UNI_MC(epel, hv, 6);
|
||||
UNI_MC(epel, hv, 8);
|
||||
UNI_MC(epel, hv, 12);
|
||||
UNI_MC(epel, hv, 16);
|
||||
UNI_MC(epel, hv, 24);
|
||||
UNI_MC(epel, hv, 32);
|
||||
UNI_MC(epel, hv, 48);
|
||||
UNI_MC(epel, hv, 64);
|
||||
|
||||
#undef UNI_MC
|
||||
|
||||
#define UNI_W_MC(PEL, DIR, WIDTH) \
|
||||
|
@ -291,6 +291,7 @@
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
out2 = LD_B(RTYPE, (psrc) + 2 * stride); \
|
||||
}
|
||||
#define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)
|
||||
#define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__)
|
||||
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
|
||||
@ -573,6 +574,18 @@
|
||||
SH(out7_m, (pblk_6x4_m + 4)); \
|
||||
}
|
||||
|
||||
/* Description : Store as 8x1 byte block to destination memory from input vector
|
||||
Arguments : Inputs - in, pdst
|
||||
Details : Index 0 double word element from input vector 'in' is copied
|
||||
and stored to destination memory at (pdst)
|
||||
*/
|
||||
#define ST8x1_UB(in, pdst) \
|
||||
{ \
|
||||
uint64_t out0_m; \
|
||||
out0_m = __msa_copy_u_d((v2i64) in, 0); \
|
||||
SD(out0_m, pdst); \
|
||||
}
|
||||
|
||||
/* Description : Store as 8x2 byte block to destination memory from input vector
|
||||
Arguments : Inputs - in, pdst, stride
|
||||
Details : Index 0 double word element from input vector 'in' is copied
|
||||
@ -716,6 +729,23 @@
|
||||
}
|
||||
#define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
|
||||
|
||||
/* Description : Immediate number of columns to slide
|
||||
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Byte elements from 'in0_0' vector are slide into 'in1_0' by
|
||||
number of elements specified by 'slide_val'
|
||||
*/
|
||||
#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
|
||||
{ \
|
||||
out0 = (RTYPE) __msa_sldi_b((v16i8) in0_0, (v16i8) in1_0, slide_val); \
|
||||
out1 = (RTYPE) __msa_sldi_b((v16i8) in0_1, (v16i8) in1_1, slide_val); \
|
||||
}
|
||||
#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
|
||||
#define SLDI_B2_SB(...) SLDI_B2(v16i8, __VA_ARGS__)
|
||||
#define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
|
||||
|
||||
|
||||
/* Description : Shuffle byte vector elements as per mask vector
|
||||
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
|
||||
Outputs - out0, out1
|
||||
@ -1090,6 +1120,16 @@
|
||||
#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
|
||||
#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
|
||||
#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
|
||||
#define ILVR_B2_SW(...) ILVR_B2(v4i32, __VA_ARGS__)
|
||||
|
||||
#define ILVR_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
|
||||
{ \
|
||||
ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
|
||||
out2 = (RTYPE) __msa_ilvr_b((v16i8) in4, (v16i8) in5); \
|
||||
}
|
||||
#define ILVR_B3_UB(...) ILVR_B3(v16u8, __VA_ARGS__)
|
||||
#define ILVR_B3_UH(...) ILVR_B3(v8u16, __VA_ARGS__)
|
||||
#define ILVR_B3_SH(...) ILVR_B3(v8i16, __VA_ARGS__)
|
||||
|
||||
#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
|
||||
out0, out1, out2, out3) \
|
||||
@ -1306,6 +1346,7 @@
|
||||
out0 = (RTYPE) __msa_splati_h((v8i16) in, idx0); \
|
||||
out1 = (RTYPE) __msa_splati_h((v8i16) in, idx1); \
|
||||
}
|
||||
#define SPLATI_H2_SB(...) SPLATI_H2(v16i8, __VA_ARGS__)
|
||||
#define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
|
||||
|
||||
#define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \
|
||||
@ -1427,7 +1468,9 @@
|
||||
in0 = (RTYPE) __msa_xori_b((v16u8) in0, 128); \
|
||||
in1 = (RTYPE) __msa_xori_b((v16u8) in1, 128); \
|
||||
}
|
||||
#define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__)
|
||||
#define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__)
|
||||
#define XORI_B2_128_SH(...) XORI_B2_128(v8i16, __VA_ARGS__)
|
||||
|
||||
#define XORI_B3_128(RTYPE, in0, in1, in2) \
|
||||
{ \
|
||||
@ -1628,6 +1671,14 @@
|
||||
#define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__)
|
||||
#define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__)
|
||||
|
||||
#define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
|
||||
{ \
|
||||
SRARI_H2(RTYPE, in0, in1, shift); \
|
||||
SRARI_H2(RTYPE, in2, in3, shift); \
|
||||
}
|
||||
#define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__)
|
||||
#define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__)
|
||||
|
||||
/* Description : Shift right arithmetic rounded (immediate)
|
||||
Arguments : Inputs - in0, in1, shift
|
||||
Outputs - in0, in1 (in place)
|
||||
|
Loading…
Reference in New Issue
Block a user