diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 46c33b83f4..d3976f0e86 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1310,29 +1310,28 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); - int sum; - register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); - register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + int sum; + register const_vector unsigned char vzero = + (const_vector unsigned char)vec_splat_u8(0); + register vector signed short temp0, temp1, temp2, temp3, temp4, + temp5, temp6, temp7; POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); { - register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); - register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); - register const_vector signed short vprod3 = (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); + register const_vector signed short vprod1 =(const_vector signed short) + AVV( 1,-1, 1,-1, 1,-1, 1,-1); + register const_vector signed short vprod2 =(const_vector signed short) + AVV( 1, 1,-1,-1, 1, 1,-1,-1); + register const_vector signed short vprod3 =(const_vector signed short) + AVV( 1, 1, 1, 1,-1,-1,-1,-1); register const_vector unsigned char perm1 = (const_vector unsigned char) - AVV(0x02, 0x03, 0x00, 0x01, - 0x06, 0x07, 0x04, 0x05, - 0x0A, 0x0B, 0x08, 0x09, - 0x0E, 0x0F, 0x0C, 0x0D); + AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, + 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); register const_vector unsigned char perm2 = (const_vector unsigned char) - AVV(0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x08, 0x09, 0x0A, 0x0B); + AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, + 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); register const_vector unsigned char perm3 = (const_vector unsigned char) - AVV(0x08, 0x09, 0x0A, 0x0B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x06, 0x07); + AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); #define ONEITERBUTTERFLY(i, res) \ { \ @@ -1443,45 +1442,46 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); */ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { - int sum; - register vector signed short - temp0 REG_v(v0), - temp1 REG_v(v1), - temp2 REG_v(v2), - temp3 REG_v(v3), - temp4 REG_v(v4), - temp5 REG_v(v5), - temp6 REG_v(v6), - temp7 REG_v(v7); - register vector signed short - temp0S REG_v(v8), - temp1S REG_v(v9), - temp2S REG_v(v10), - temp3S REG_v(v11), - temp4S REG_v(v12), - temp5S REG_v(v13), - temp6S REG_v(v14), - temp7S REG_v(v15); - register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); + int sum; + register vector signed short + temp0 REG_v(v0), + temp1 REG_v(v1), + temp2 REG_v(v2), + temp3 REG_v(v3), + temp4 REG_v(v4), + temp5 REG_v(v5), + temp6 REG_v(v6), + temp7 REG_v(v7); + register vector signed short + temp0S REG_v(v8), + temp1S REG_v(v9), + temp2S REG_v(v10), + temp3S REG_v(v11), + temp4S REG_v(v12), + temp5S REG_v(v13), + temp6S REG_v(v14), + temp7S REG_v(v15); + register const_vector unsigned char vzero REG_v(v31)= + (const_vector unsigned char)vec_splat_u8(0); { - register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); - register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); - register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); - register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) - AVV(0x02, 0x03, 0x00, 0x01, - 0x06, 0x07, 0x04, 0x05, - 0x0A, 0x0B, 0x08, 0x09, - 0x0E, 0x0F, 0x0C, 0x0D); - register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) - AVV(0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x08, 0x09, 0x0A, 0x0B); - register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) - AVV(0x08, 0x09, 0x0A, 0x0B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x06, 0x07); + register const_vector signed short vprod1 REG_v(v16)= + (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); + register const_vector signed short vprod2 REG_v(v17)= + (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); + register const_vector signed short vprod3 REG_v(v18)= + (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); + register const_vector unsigned char perm1 REG_v(v19)= + (const_vector unsigned char) + AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, + 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); + register const_vector unsigned char perm2 REG_v(v20)= + (const_vector unsigned char) + AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, + 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); + register const_vector unsigned char perm3 REG_v(v21)= + (const_vector unsigned char) + AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); #define ONEITERBUTTERFLY(i, res1, res2) \ { \ @@ -1642,27 +1642,27 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); - int score; + int score; POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); - score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); - if (h==16) { - dst += 8*stride; - src += 8*stride; - score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); - } + score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); + if (h==16) { + dst += 8*stride; + src += 8*stride; + score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); + } POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); - return score; + return score; } int has_altivec(void) { #ifdef __AMIGAOS4__ - ULONG result = 0; - extern struct ExecIFace *IExec; + ULONG result = 0; + extern struct ExecIFace *IExec; - IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); - if (result == VECTORTYPE_ALTIVEC) return 1; - return 0; + IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); + if (result == VECTORTYPE_ALTIVEC) return 1; + return 0; #else /* __AMIGAOS4__ */ #ifdef CONFIG_DARWIN @@ -1757,75 +1757,66 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); } POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ - register int i; - register vector unsigned char - pixelsv1, pixelsv2, - pixelsavg; - register vector unsigned char - blockv, temp1, temp2, blocktemp; - register vector unsigned short - pixelssum1, pixelssum2, temp3; - register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); - register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); + register int i; + register vector unsigned char pixelsv1, pixelsv2, pixelsavg; + register vector unsigned char blockv, temp1, temp2, blocktemp; + register vector unsigned short pixelssum1, pixelssum2, temp3; - temp1 = vec_ld(0, pixels); - temp2 = vec_ld(16, pixels); - pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); - if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) - { - pixelsv2 = temp2; - } - else - { - pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); - } - pixelsv1 = vec_mergeh(vczero, pixelsv1); - pixelsv2 = vec_mergeh(vczero, pixelsv2); - pixelssum1 = vec_add((vector unsigned short)pixelsv1, - (vector unsigned short)pixelsv2); - pixelssum1 = vec_add(pixelssum1, vctwo); + register const_vector unsigned char vczero = (const_vector unsigned char) + vec_splat_u8(0); + register const_vector unsigned short vctwo = (const_vector unsigned short) + vec_splat_u16(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { + pixelsv2 = temp2; + } else { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); - for (i = 0; i < h ; i++) { - int rightside = ((unsigned long)block & 0x0000000F); - blockv = vec_ld(0, block); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); - temp1 = vec_ld(line_size, pixels); - temp2 = vec_ld(line_size + 16, pixels); - pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); - if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) - { - pixelsv2 = temp2; - } - else - { - pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); - } + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } else { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } - pixelsv1 = vec_mergeh(vczero, pixelsv1); - pixelsv2 = vec_mergeh(vczero, pixelsv2); - pixelssum2 = vec_add((vector unsigned short)pixelsv1, - (vector unsigned short)pixelsv2); - temp3 = vec_add(pixelssum1, pixelssum2); - temp3 = vec_sra(temp3, vctwo); - pixelssum1 = vec_add(pixelssum2, vctwo); - pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); - if (rightside) - { - blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); - } - else - { - blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); - } + if (rightside) { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } else { + blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } - blockv = vec_avg(blocktemp, blockv); - vec_st(blockv, 0, block); + blockv = vec_avg(blocktemp, blockv); + vec_st(blockv, 0, block); - block += line_size; - pixels += line_size; - } + block += line_size; + pixels += line_size; + } POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */