lavc/aarch64: new optimization for 8-bit hevc_qpel_h hevc_qpel_uni_w_hv

Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
Logan Lyu 2023-05-28 09:56:51 +08:00 committed by Martin Storsjö
parent 15972cce8c
commit e79686be96
2 changed files with 1102 additions and 0 deletions

View File

@ -145,6 +145,13 @@ void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, co
void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
void ff_hevc_put_hevc_##fn##64_8_neon##ext args; \
#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
void ff_hevc_put_hevc_##fn##64_8_neon##ext args; \
NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
@ -156,11 +163,20 @@ NEON8_FNPROTO_PARTIAL_4(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
int height, int denom, int wx, int ox,
intptr_t mx, intptr_t my, int width),);
NEON8_FNPROTO(qpel_h, (int16_t *dst,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, intptr_t mx, intptr_t my, int width), _i8mm);
NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, int denom, int wx, int ox,
intptr_t mx, intptr_t my, int width), _i8mm);
NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, int denom, int wx, int ox,
intptr_t mx, intptr_t my, int width), _i8mm);
#define NEON8_FNASSIGN(member, v, h, fn, ext) \
member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
@ -181,6 +197,12 @@ NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
member[8][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
#define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext) \
member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
{
@ -247,6 +269,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
if (have_i8mm(cpu_flags)) {
NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv, _i8mm);
}
}

File diff suppressed because it is too large Load Diff