Fix ff_emu_edge_core_sse() on Win64.

Fix emu_edge_v_extend_15 to be <128 bytes on Win64, by being more strict
on the size of registers and which registers are being used for operations
where multiple are available. This fixes segfaults in emulated_edge()
function calls on Win64.
This commit is contained in:
Ronald S. Bultje 2011-02-08 15:56:32 -05:00
parent 3940caad02
commit 17cf7c68ed

View File

@ -468,6 +468,11 @@ cglobal emu_edge_core_%1, 2, 7, 0
%define valw ax %define valw ax
%define valw2 r10w %define valw2 r10w
%define valw3 r3w %define valw3 r3w
%ifdef WIN64
%define valw4 r4w
%else ; unix64
%define valw4 r3w
%endif
%define vald eax %define vald eax
%else %else
%define vall bl %define vall bl
@ -475,6 +480,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
%define valw bx %define valw bx
%define valw2 r6w %define valw2 r6w
%define valw3 valw2 %define valw3 valw2
%define valw4 valw3
%define vald ebx %define vald ebx
%define stack_offset 0x14 %define stack_offset 0x14
%endif %endif
@ -537,8 +543,10 @@ cglobal emu_edge_core_%1, 2, 7, 0
%elif (%2-%%src_off) == 3 %elif (%2-%%src_off) == 3
%ifidn %1, top %ifidn %1, top
mov valw2, [r1+%%src_off] mov valw2, [r1+%%src_off]
%else ; %1 != top %elifidn %1, body
mov valw3, [r1+%%src_off] mov valw3, [r1+%%src_off]
%elifidn %1, bottom
mov valw4, [r1+%%src_off]
%endif ; %1 ==/!= top %endif ; %1 ==/!= top
mov vall, [r1+%%src_off+2] mov vall, [r1+%%src_off+2]
%endif ; (%2-%%src_off) == 1/2/3 %endif ; (%2-%%src_off) == 1/2/3
@ -584,8 +592,10 @@ cglobal emu_edge_core_%1, 2, 7, 0
%elif (%2-%%dst_off) == 3 %elif (%2-%%dst_off) == 3
%ifidn %1, top %ifidn %1, top
mov [r0+%%dst_off], valw2 mov [r0+%%dst_off], valw2
%else ; %1 != top %elifidn %1, body
mov [r0+%%dst_off], valw3 mov [r0+%%dst_off], valw3
%elifidn %1, bottom
mov [r0+%%dst_off], valw4
%endif ; %1 ==/!= top %endif ; %1 ==/!= top
mov [r0+%%dst_off+2], vall mov [r0+%%dst_off+2], vall
%endif ; (%2-%%dst_off) == 1/2/3 %endif ; (%2-%%dst_off) == 1/2/3
@ -615,7 +625,7 @@ ALIGN 128
WRITE_NUM_BYTES top, %%n, %1 ; write bytes WRITE_NUM_BYTES top, %%n, %1 ; write bytes
add r0 , r2 ; dst += linesize add r0 , r2 ; dst += linesize
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
dec r3 dec r3d
%else ; ARCH_X86_32 %else ; ARCH_X86_32
dec dword r3m dec dword r3m
%endif ; ARCH_X86_64/32 %endif ; ARCH_X86_64/32
@ -627,7 +637,7 @@ ALIGN 128
WRITE_NUM_BYTES body, %%n, %1 ; write bytes WRITE_NUM_BYTES body, %%n, %1 ; write bytes
add r0 , r2 ; dst += linesize add r0 , r2 ; dst += linesize
add r1 , r2 ; src += linesize add r1 , r2 ; src += linesize
dec r4 dec r4d
jnz .emuedge_copy_body_ %+ %%n %+ _loop ; } while (--end_y) jnz .emuedge_copy_body_ %+ %%n %+ _loop ; } while (--end_y)
; copy bottom pixels ; copy bottom pixels
@ -638,7 +648,7 @@ ALIGN 128
.emuedge_extend_bottom_ %+ %%n %+ _loop: ; do { .emuedge_extend_bottom_ %+ %%n %+ _loop: ; do {
WRITE_NUM_BYTES bottom, %%n, %1 ; write bytes WRITE_NUM_BYTES bottom, %%n, %1 ; write bytes
add r0 , r2 ; dst += linesize add r0 , r2 ; dst += linesize
dec r5 dec r5d
jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h) jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h)
.emuedge_v_extend_end_ %+ %%n: .emuedge_v_extend_end_ %+ %%n: