diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index d1a2943656..633cae68b0 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -49,7 +49,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h { int i; int s __attribute__((aligned(16))); - const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned int sad; @@ -96,7 +96,7 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h { int i; int s __attribute__((aligned(16))); - const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); vector unsigned char *tv; vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned int sad; @@ -157,8 +157,8 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int int i; int s __attribute__((aligned(16))); uint8_t *pix3 = pix2 + line_size; - const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); - const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); + const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); + const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2); vector unsigned char *tv, avgv, t5; vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; @@ -257,7 +257,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sad; @@ -299,7 +299,7 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sad; @@ -344,7 +344,7 @@ int pix_norm1_altivec(uint8_t *pix, int line_size) { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char *tv; vector unsigned char pixv; vector unsigned int sv; @@ -380,7 +380,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sum; @@ -436,7 +436,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char t1, t2, t3,t4, t5; vector unsigned int sum; @@ -480,7 +480,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) int pix_sum_altivec(uint8_t * pix, int line_size) { - const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); + const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0); vector unsigned char perm, *pixv; vector unsigned char t1; vector unsigned int sad; @@ -515,7 +515,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line { int i; vector unsigned char perm, bytes, *pixv; - const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); vector signed short shorts; for(i=0;i<8;i++) @@ -542,7 +542,7 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, { int i; vector unsigned char perm, bytes, *pixv; - const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); + const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0); vector signed short shorts1, shorts2; for(i=0;i<4;i++) @@ -654,10 +654,10 @@ POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; il); - *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); - *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); - *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); + *((uint32_t*)(block)) = LD32(pixels); + *((uint32_t*)(block+4)) = LD32(pixels+4); + *((uint32_t*)(block+8)) = LD32(pixels+8); + *((uint32_t*)(block+12)) = LD32(pixels+12); pixels+=line_size; block +=line_size; } @@ -729,10 +729,10 @@ POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); for(i=0; il)); - op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); - op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); - op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); + op_avg(*((uint32_t*)(block)),LD32(pixels)); + op_avg(*((uint32_t*)(block+4)),LD32(pixels+4)); + op_avg(*((uint32_t*)(block+8)),LD32(pixels+8)); + op_avg(*((uint32_t*)(block+12)),LD32(pixels+12)); pixels+=line_size; block +=line_size; } @@ -878,8 +878,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); blockv, temp1, temp2; register vector unsigned short pixelssum1, pixelssum2, temp3; - register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); - register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -993,9 +993,9 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); blockv, temp1, temp2; register vector unsigned short pixelssum1, pixelssum2, temp3; - register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); - register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); - register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); + register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); temp1 = vec_ld(0, pixels); temp2 = vec_ld(16, pixels); @@ -1109,8 +1109,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); register vector unsigned short pixelssum1, pixelssum2, temp3, pixelssum3, pixelssum4, temp4; - register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); - register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); @@ -1230,9 +1230,9 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); register vector unsigned short pixelssum1, pixelssum2, temp3, pixelssum3, pixelssum4, temp4; - register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); - register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); - register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1); + register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2); POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 9882e401f9..691b7725b1 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -69,13 +69,13 @@ static unsigned char* perfname[] = { void powerpc_display_perf_report(void) { int i, j; - fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); + av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); for(i = 0 ; i < powerpc_perf_total ; i++) { for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) { if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) - fprintf(stderr, + av_log(NULL, AV_LOG_INFO, " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", perfname[i], j+1, @@ -129,7 +129,11 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); i += 16; } for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) { +#ifndef __MWERKS__ asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); +#else + __dcbz( blocks, i ); +#endif } if (misal) { ((unsigned long*)blocks)[188] = 0L; diff --git a/libavcodec/ppc/gcc_fixes.h b/libavcodec/ppc/gcc_fixes.h index a8e92cb2f1..13d4ff12e7 100644 --- a/libavcodec/ppc/gcc_fixes.h +++ b/libavcodec/ppc/gcc_fixes.h @@ -12,7 +12,11 @@ #endif #ifdef CONFIG_DARWIN -#define AVV(x...) (x) +# ifndef __MWERKS__ +# define AVV(x...) (x) +# else +# define AVV +# endif #else #define AVV(x...) {x} #if (__GNUC__ * 100 + __GNUC_MINOR__ < 303) @@ -81,4 +85,10 @@ __ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \ #endif /* CONFIG_DARWIN */ +#ifndef __MWERKS__ +#define const_vector const vector +#else +#define const_vector vector +#endif + #endif /* _GCC_FIXES_ */ diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/gmc_altivec.c index 671ee110aa..3448216859 100644 --- a/libavcodec/ppc/gmc_altivec.c +++ b/libavcodec/ppc/gmc_altivec.c @@ -69,8 +69,8 @@ POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); ( x16)*( y16), /* D */ 0, 0, 0, 0 /* padding */ }; - register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); - register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8); + register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0); + register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8); register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; int i; diff --git a/libavcodec/ppc/idct_altivec.c b/libavcodec/ppc/idct_altivec.c index d821ecd223..3445adaddf 100644 --- a/libavcodec/ppc/idct_altivec.c +++ b/libavcodec/ppc/idct_altivec.c @@ -44,6 +44,7 @@ #include "dsputil_altivec.h" #define vector_s16_t vector signed short +#define const_vector_s16_t const_vector signed short #define vector_u16_t vector unsigned short #define vector_s8_t vector signed char #define vector_u8_t vector unsigned char @@ -155,7 +156,7 @@ vx7 = vec_sra (vy7, shift); -static const vector_s16_t constants[5] = { +static const_vector_s16_t constants[5] = { (vector_s16_t) AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), (vector_s16_t) AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), (vector_s16_t) AVV(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), @@ -174,8 +175,9 @@ POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ vector_u8_t tmp; +#ifdef POWERPC_PERFORMANCE_REPORT POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); - +#endif IDCT #define COPY(dest,src) \ @@ -211,7 +213,9 @@ POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); vector_u8_t perm1; vector_u8_t p0, p1, p; +#ifdef POWERPC_PERFORMANCE_REPORT POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); +#endif IDCT diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c index ae3170d91a..51b387792c 100644 --- a/libavcodec/ppc/mpegvideo_altivec.c +++ b/libavcodec/ppc/mpegvideo_altivec.c @@ -107,7 +107,9 @@ int dct_quantize_altivec(MpegEncContext* s, int lastNonZero; vector float row0, row1, row2, row3, row4, row5, row6, row7; vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; - const vector float zero = (const vector float)FOUROF(0.); + const_vector float zero = (const_vector float)FOUROF(0.); + // used after quantise step + int oldBaseValue = 0; // Load the data into the row/alt vectors { @@ -284,9 +286,6 @@ int dct_quantize_altivec(MpegEncContext* s, } } - // used after quantise step - int oldBaseValue = 0; - // perform the quantise step, using the floating point data // still in the row/alt registers { @@ -414,21 +413,23 @@ int dct_quantize_altivec(MpegEncContext* s, data7 = vec_max(vec_min(data7, max_q), min_q); } + { vector bool char zero_01, zero_23, zero_45, zero_67; vector signed char scanIndices_01, scanIndices_23, scanIndices_45, scanIndices_67; vector signed char negOne = vec_splat_s8(-1); vector signed char* scanPtr = (vector signed char*)(s->intra_scantable.inverse); + signed char lastNonZeroChar; // Determine the largest non-zero index. - zero_01 = vec_pack(vec_cmpeq(data0, (vector short)zero), - vec_cmpeq(data1, (vector short)zero)); - zero_23 = vec_pack(vec_cmpeq(data2, (vector short)zero), - vec_cmpeq(data3, (vector short)zero)); - zero_45 = vec_pack(vec_cmpeq(data4, (vector short)zero), - vec_cmpeq(data5, (vector short)zero)); - zero_67 = vec_pack(vec_cmpeq(data6, (vector short)zero), - vec_cmpeq(data7, (vector short)zero)); + zero_01 = vec_pack(vec_cmpeq(data0, (vector signed short)zero), + vec_cmpeq(data1, (vector signed short)zero)); + zero_23 = vec_pack(vec_cmpeq(data2, (vector signed short)zero), + vec_cmpeq(data3, (vector signed short)zero)); + zero_45 = vec_pack(vec_cmpeq(data4, (vector signed short)zero), + vec_cmpeq(data5, (vector signed short)zero)); + zero_67 = vec_pack(vec_cmpeq(data6, (vector signed short)zero), + vec_cmpeq(data7, (vector signed short)zero)); // 64 biggest values scanIndices_01 = vec_sel(scanPtr[0], negOne, zero_01); @@ -461,7 +462,6 @@ int dct_quantize_altivec(MpegEncContext* s, scanIndices_01 = vec_splat(scanIndices_01, 0); - signed char lastNonZeroChar; vec_ste(scanIndices_01, 0, &lastNonZeroChar); @@ -484,6 +484,7 @@ int dct_quantize_altivec(MpegEncContext* s, vec_st(data5, 80, data); vec_st(data6, 96, data); vec_st(data7, 112, data); + } } // special handling of block[0] @@ -562,7 +563,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); } #else /* ALTIVEC_USE_REFERENCE_C_CODE */ { - register const vector short vczero = (const vector short)vec_splat_s16(0); + register const_vector signed short vczero = (const_vector signed short)vec_splat_s16(0); short __attribute__ ((aligned(16))) qmul8[] = { qmul, qmul, qmul, qmul, @@ -578,7 +579,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); -qadd, -qadd, -qadd, -qadd, -qadd, -qadd, -qadd, -qadd }; - register vector short blockv, qmulv, qaddv, nqaddv, temp1; + register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; register vector bool short blockv_null, blockv_neg; register short backup_0 = block[0]; register int j = 0;