mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/mips: Improve vp9 mc msa functions
Load the specific destination bytes instead of MSA load and pack. Signed-off-by: Kaustubh Raste <kaustubh.raste@imgtec.com> Reviewed-by: Manojkumar Bhosale <Manojkumar.Bhosale@imgtec.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
c75b23cbea
commit
9b2c3c406f
|
@ -1479,7 +1479,8 @@ static void avc_luma_hz_and_aver_dst_8x8_msa(const uint8_t *src,
|
|||
plus20b, res0, res1, res2, res3);
|
||||
SRARI_H4_SH(res0, res1, res2, res3, 5);
|
||||
SAT_SH4_SH(res0, res1, res2, res3, 7);
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
|
||||
dst, dst_stride);
|
||||
|
||||
dst += (4 * dst_stride);
|
||||
|
@ -1825,8 +1826,8 @@ static void avc_luma_vt_and_aver_dst_8x8_msa(const uint8_t *src,
|
|||
SRARI_H4_SH(out0, out1, out2, out3, 5);
|
||||
SAT_SH4_SH(out0, out1, out2, out3, 7);
|
||||
LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
|
||||
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
|
||||
dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
|
@ -2229,7 +2230,8 @@ static void avc_luma_mid_and_aver_dst_8w_msa(const uint8_t *src,
|
|||
res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
|
||||
hz_out6, hz_out7, hz_out8);
|
||||
LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
|
||||
dst, dst_stride);
|
||||
|
||||
dst += (4 * dst_stride);
|
||||
|
@ -2518,8 +2520,8 @@ static void avc_luma_midv_qrt_and_aver_dst_8w_msa(const uint8_t *src,
|
|||
res1 = __msa_aver_s_h(res2, res3);
|
||||
res2 = __msa_aver_s_h(res4, res5);
|
||||
res3 = __msa_aver_s_h(res6, res7);
|
||||
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
|
||||
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
|
||||
dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
|
@ -2676,7 +2678,8 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
|
|||
out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
|
||||
|
||||
SAT_SH4_SH(out0, out1, out2, out3, 7);
|
||||
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
|
||||
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
|
||||
dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2773,20 +2773,18 @@
|
|||
|
||||
/* Description : Converts inputs to unsigned bytes, interleave, average & store
|
||||
as 8x4 unsigned byte block
|
||||
Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
|
||||
pdst, stride
|
||||
Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, pdst, stride
|
||||
*/
|
||||
#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
|
||||
dst0, dst1, dst2, dst3, pdst, stride) \
|
||||
{ \
|
||||
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
uint8_t *pdst_m = (uint8_t *) (pdst); \
|
||||
\
|
||||
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
|
||||
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
|
||||
ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
|
||||
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
|
||||
ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
|
||||
#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
|
||||
dst0, dst1, pdst, stride) \
|
||||
{ \
|
||||
v16u8 tmp0_m, tmp1_m; \
|
||||
uint8_t *pdst_m = (uint8_t *) (pdst); \
|
||||
\
|
||||
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
|
||||
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
|
||||
AVER_UB2_UB(tmp0_m, dst0, tmp1_m, dst1, tmp0_m, tmp1_m); \
|
||||
ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
|
||||
}
|
||||
|
||||
/* Description : Pack even byte elements, extract 0 & 2 index words from pair
|
||||
|
|
Loading…
Reference in New Issue