From 408bb8556fde971d407d69d47edb350499e7c081 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 10 Oct 2015 20:31:57 -0400 Subject: [PATCH] vp9: refactor itx coefficients and share between 8 and 10/12bpp. --- libavcodec/x86/vp9itxfm.asm | 88 ++++++++++++++++++------------ libavcodec/x86/vp9itxfm_16bpp.asm | 91 +++++++++++++------------------ 2 files changed, 91 insertions(+), 88 deletions(-) diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index a3e0f863ff..6d5008e334 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -26,50 +26,70 @@ SECTION_RODATA -pw_11585x2: times 8 dw 23170 -pw_m11585x2: times 8 dw -23170 -pw_m11585_11585: times 4 dw -11585, 11585 -pw_11585_11585: times 8 dw 11585 -pw_m11585_m11585: times 8 dw -11585 - %macro VP9_IDCT_COEFFS 2-3 0 -pw_%1x2: times 8 dw %1*2 -pw_m%1x2: times 8 dw -%1*2 -pw_%2x2: times 8 dw %2*2 -pw_m%2x2: times 8 dw -%2*2 -pw_m%1_%2: times 4 dw -%1, %2 -pw_%2_%1: times 4 dw %2, %1 -pw_m%2_m%1: times 4 dw -%2, -%1 +const pw_m%1_%2 +times 4 dw -%1, %2 +const pw_%2_%1 +times 4 dw %2, %1 + %if %3 == 1 -pw_m%2_%1: times 4 dw -%2, %1 -pw_%1_%2: times 4 dw %1, %2 +const pw_m%2_m%1 +times 4 dw -%2, -%1 +%if %1 != %2 +const pw_m%2_%1 +times 4 dw -%2, %1 +const pw_%1_%2 +times 4 dw %1, %2 +%endif +%endif + +%if %1 < 11585 +pw_m%1x2: times 8 dw -%1*2 +%elif %1 > 11585 +pw_%1x2: times 8 dw %1*2 +%else +const pw_%1x2 +times 8 dw %1*2 +%endif + +%if %2 != %1 +pw_%2x2: times 8 dw %2*2 %endif %endmacro -VP9_IDCT_COEFFS 15137, 6270, 1 -VP9_IDCT_COEFFS 16069, 3196, 1 -VP9_IDCT_COEFFS 9102, 13623, 1 -VP9_IDCT_COEFFS 16305, 1606 -VP9_IDCT_COEFFS 10394, 12665 -VP9_IDCT_COEFFS 14449, 7723 -VP9_IDCT_COEFFS 4756, 15679 VP9_IDCT_COEFFS 16364, 804 -VP9_IDCT_COEFFS 11003, 12140 -VP9_IDCT_COEFFS 14811, 7005 -VP9_IDCT_COEFFS 5520, 15426 +VP9_IDCT_COEFFS 16305, 1606 +VP9_IDCT_COEFFS 16069, 3196, 1 VP9_IDCT_COEFFS 15893, 3981 -VP9_IDCT_COEFFS 8423, 14053 +VP9_IDCT_COEFFS 15137, 6270, 1 +VP9_IDCT_COEFFS 14811, 7005 +VP9_IDCT_COEFFS 14449, 7723 VP9_IDCT_COEFFS 13160, 9760 +VP9_IDCT_COEFFS 11585, 11585, 1 +VP9_IDCT_COEFFS 11003, 12140 +VP9_IDCT_COEFFS 10394, 12665 +VP9_IDCT_COEFFS 9102, 13623, 1 +VP9_IDCT_COEFFS 8423, 14053 +VP9_IDCT_COEFFS 5520, 15426 +VP9_IDCT_COEFFS 4756, 15679 VP9_IDCT_COEFFS 2404, 16207 -pw_5283_13377: times 4 dw 5283, 13377 -pw_9929_13377: times 4 dw 9929, 13377 -pw_15212_m13377: times 4 dw 15212, -13377 -pw_15212_9929: times 4 dw 15212, 9929 -pw_m5283_m15212: times 4 dw -5283, -15212 -pw_13377x2: times 8 dw 13377*2 -pw_m13377_13377: times 4 dw -13377, 13377 -pw_13377_0: times 4 dw 13377, 0 +const pw_5283_13377 +times 4 dw 5283, 13377 +const pw_9929_13377 +times 4 dw 9929, 13377 +const pw_15212_m13377 +times 4 dw 15212, -13377 +const pw_15212_9929 +times 4 dw 15212, 9929 +const pw_m5283_m15212 +times 4 dw -5283, -15212 +const pw_13377x2 +times 8 dw 13377*2 +const pw_m13377_13377 +times 4 dw -13377, 13377 +const pw_13377_0 +times 4 dw 13377, 0 cextern pw_8 cextern pw_16 diff --git a/libavcodec/x86/vp9itxfm_16bpp.asm b/libavcodec/x86/vp9itxfm_16bpp.asm index 3257986d30..346c5147e8 100644 --- a/libavcodec/x86/vp9itxfm_16bpp.asm +++ b/libavcodec/x86/vp9itxfm_16bpp.asm @@ -38,64 +38,47 @@ cextern pd_8192 pd_8: times 4 dd 8 pd_3fff: times 4 dd 0x3fff -; FIXME these should probably be shared between 8bpp and 10/12bpp -pw_m11585_11585: times 4 dw -11585, 11585 -pw_11585_11585: times 8 dw 11585 -pw_m15137_6270: times 4 dw -15137, 6270 -pw_6270_15137: times 4 dw 6270, 15137 -pw_11585x2: times 8 dw 11585*2 +cextern pw_11585x2 + +cextern pw_5283_13377 +cextern pw_9929_13377 +cextern pw_15212_m13377 +cextern pw_15212_9929 +cextern pw_m5283_m15212 +cextern pw_13377x2 +cextern pw_m13377_13377 +cextern pw_13377_0 -pw_5283_13377: times 4 dw 5283, 13377 -pw_9929_13377: times 4 dw 9929, 13377 -pw_15212_m13377: times 4 dw 15212, -13377 -pw_15212_9929: times 4 dw 15212, 9929 -pw_m5283_m15212: times 4 dw -5283, -15212 -pw_13377x2: times 8 dw 13377*2 -pw_m13377_13377: times 4 dw -13377, 13377 -pw_13377_0: times 4 dw 13377, 0 pw_9929_m5283: times 4 dw 9929, -5283 -pw_3196_16069: times 4 dw 3196, 16069 -pw_m16069_3196: times 4 dw -16069, 3196 -pw_13623_9102: times 4 dw 13623, 9102 -pw_m9102_13623: times 4 dw -9102, 13623 +%macro COEF_PAIR 2-3 +cextern pw_m%1_%2 +cextern pw_%2_%1 +%if %0 == 3 +cextern pw_m%1_m%2 +%if %1 != %2 +cextern pw_m%2_%1 +cextern pw_%1_%2 +%endif +%endif +%endmacro -pw_1606_16305: times 4 dw 1606, 16305 -pw_m16305_1606: times 4 dw -16305, 1606 -pw_12665_10394: times 4 dw 12665, 10394 -pw_m10394_12665: times 4 dw -10394, 12665 -pw_7723_14449: times 4 dw 7723, 14449 -pw_m14449_7723: times 4 dw -14449, 7723 -pw_15679_4756: times 4 dw 15679, 4756 -pw_m4756_15679: times 4 dw -4756, 15679 -pw_15137_6270: times 4 dw 15137, 6270 -pw_m6270_15137: times 4 dw -6270, 15137 - -pw_804_16364: times 4 dw 804, 16364 -pw_m16364_804: times 4 dw -16364, 804 -pw_12140_11003: times 4 dw 12140, 11003 -pw_m11003_12140: times 4 dw -11003, 12140 -pw_7005_14811: times 4 dw 7005, 14811 -pw_m14811_7005: times 4 dw -14811, 7005 -pw_15426_5520: times 4 dw 15426, 5520 -pw_m5520_15426: times 4 dw -5520, 15426 -pw_16069_3196: times 4 dw 16069, 3196 -pw_m3196_16069: times 4 dw -3196, 16069 -pw_3981_15893: times 4 dw 3981, 15893 -pw_m15893_3981: times 4 dw -15893, 3981 -pw_14053_8423: times 4 dw 14053, 8423 -pw_m8423_14053: times 4 dw -8423, 14053 -pw_9760_13160: times 4 dw 9760, 13160 -pw_m13160_9760: times 4 dw -13160, 9760 -pw_16207_2404: times 4 dw 16207, 2404 -pw_m2404_16207: times 4 dw -2404, 16207 -pw_9102_13623: times 4 dw 9102, 13623 -pw_m13623_9102: times 4 dw -13623, 9102 -pw_m11585_m11585: times 8 dw -11585 - -pw_m3196_m16069: times 4 dw -3196, -16069 -pw_m13623_m9102: times 4 dw -13623, -9102 -pw_m6270_m15137: times 4 dw -6270, -15137 +COEF_PAIR 2404, 16207 +COEF_PAIR 3196, 16069, 1 +COEF_PAIR 4756, 15679 +COEF_PAIR 5520, 15426 +COEF_PAIR 6270, 15137, 1 +COEF_PAIR 8423, 14053 +COEF_PAIR 10394, 12665 +COEF_PAIR 11003, 12140 +COEF_PAIR 11585, 11585, 1 +COEF_PAIR 13160, 9760 +COEF_PAIR 13623, 9102, 1 +COEF_PAIR 14449, 7723 +COEF_PAIR 14811, 7005 +COEF_PAIR 15893, 3981 +COEF_PAIR 16305, 1606 +COEF_PAIR 16364, 804 default_8x8: times 12 db 1