x86/hevc_add_res: merge last remaining changes from 3d65359832

See https://lists.libav.org/pipermail/libav-devel/2016-October/079829.html
This commit is contained in:
James Almer 2017-03-31 20:42:16 -03:00
parent b62a87591e
commit 6171f178e7
2 changed files with 14 additions and 31 deletions

View File

@ -97,7 +97,6 @@ Stuff that didn't reach the codebase:
- VAAPI VP8 decode hwaccel (currently under review: http://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/thread.html#207348)
- Removal of the custom atomic API (5cc0057f49, see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209003.html)
- Use the new bitstream filter for extracting extradata (8e2ea69135 and 096a8effa3, see https://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209068.html)
- ADD_RES_MMX_4_8 in libavcodec/x86/hevc_add_res.asm probably needs updating (see 589880710)
- Read aac_adtstoasc extradata updates from packet side data on Matroska once mov and the bsf in question are fixed (See 13a211e632 and 5ef1959080)
Collateral damage that needs work locally:

View File

@ -28,25 +28,23 @@ cextern pw_1023
; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project
%macro ADD_RES_MMX_4_8 0
mova m2, [r1]
mova m4, [r1+8]
mova m0, [r1]
mova m2, [r1+8]
pxor m1, m1
pxor m3, m3
psubw m1, m0
psubw m3, m2
packuswb m2, m2
packuswb m3, m3
pxor m5, m5
psubw m5, m4
packuswb m4, m4
packuswb m5, m5
packuswb m0, m2
packuswb m1, m3
movh m0, [r0]
movh m1, [r0+r2]
movd m2, [r0]
movd m3, [r0+r2]
punpckldq m2, m3
paddusb m0, m2
paddusb m1, m4
psubusb m0, m3
psubusb m1, m5
movh [r0], m0
movh [r0+r2], m1
psubusb m0, m1
movd [r0], m0
psrlq m0, 32
movd [r0+r2], m0
%endmacro
@ -95,15 +93,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
vinserti128 m2, m2, [r1+%1+32], 1
vinserti128 m6, m6, [r1+%1+48], 1
%endif
%if cpuflag(avx)
psubw m1, m0, m2
psubw m5, m0, m6
%else
mova m1, m0
mova m5, m0
psubw m1, m2
psubw m5, m6
%endif
packuswb m2, m6
packuswb m1, m5
@ -113,15 +104,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
vinserti128 m4, m4, [r1+%1+96 ], 1
vinserti128 m6, m6, [r1+%1+112], 1
%endif
%if cpuflag(avx)
psubw m3, m0, m4
psubw m5, m0, m6
%else
mova m3, m0
mova m5, m0
psubw m3, m4
psubw m5, m6
%endif
packuswb m4, m6
packuswb m3, m5
@ -192,7 +176,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7
dec r4d
jg .loop
RET
%endif
%endif ;HAVE_AVX2_EXTERNAL
%macro ADD_RES_SSE_8_10 4
mova m0, [%4]