mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 16:52:31 +00:00
PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 2008 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
cc74aafbc6
commit
3efd4952df
15
configure
vendored
15
configure
vendored
@ -380,18 +380,25 @@ if test $tune != "generic"; then
|
||||
TUNECPU=ppc604
|
||||
;;
|
||||
G3|75*|ppc75*|PowerPC75*)
|
||||
CFLAGS="$CFLAGS -mcpu=750"
|
||||
CFLAGS="$CFLAGS -mcpu=750 -mtune=750"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: tuning for PPC75x but altivec enabled !";
|
||||
fi
|
||||
TUNECPU=ppc750
|
||||
;;
|
||||
G4|74*|ppc74*|PowerPC74*)
|
||||
CFLAGS="$CFLAGS -mcpu=7400"
|
||||
G4|745*|ppc745*|PowerPC745*)
|
||||
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: tuning for PPC745x but altivec disabled !";
|
||||
fi
|
||||
TUNECPU=ppc7450
|
||||
;;
|
||||
74*|ppc74*|PowerPC74*)
|
||||
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: tuning for PPC74xx but altivec disabled !";
|
||||
fi
|
||||
TUNECPU=ppc7400
|
||||
TUNECPU=ppc7450
|
||||
;;
|
||||
G5|970|ppc970|PowerPC970|power4*|Power4*)
|
||||
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc64 -force_cpusubtype_ALL "
|
||||
|
@ -1086,7 +1086,9 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
pixelssum3, pixelssum4, temp4;
|
||||
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
|
||||
temp1 = vec_ld(0, pixels);
|
||||
temp2 = vec_ld(16, pixels);
|
||||
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
||||
@ -1109,7 +1111,6 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
(vector unsigned short)pixelsv2);
|
||||
pixelssum1 = vec_add(pixelssum1, vctwo);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
for (i = 0; i < h ; i++) {
|
||||
blockv = vec_ld(0, block);
|
||||
|
||||
@ -1207,7 +1208,9 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
||||
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
|
||||
temp1 = vec_ld(0, pixels);
|
||||
temp2 = vec_ld(16, pixels);
|
||||
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
||||
@ -1230,7 +1233,6 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
(vector unsigned short)pixelsv2);
|
||||
pixelssum1 = vec_add(pixelssum1, vcone);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
for (i = 0; i < h ; i++) {
|
||||
blockv = vec_ld(0, block);
|
||||
|
||||
|
@ -61,7 +61,8 @@ static unsigned char* perfname[] = {
|
||||
"clear_blocks_dcbz128_ppc"
|
||||
};
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
|
||||
unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
|
||||
unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
@ -86,14 +87,22 @@ void powerpc_display_perf_report(void)
|
||||
(double)perfdata[i][powerpc_data_num],
|
||||
perfdata[i][powerpc_data_num]);
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0)
|
||||
if (perfdata_pmc2[i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
perfdata_miss[i][powerpc_data_min],
|
||||
perfdata_miss[i][powerpc_data_max],
|
||||
(double)perfdata_miss[i][powerpc_data_sum] /
|
||||
(double)perfdata_miss[i][powerpc_data_num],
|
||||
perfdata_miss[i][powerpc_data_num]);
|
||||
perfdata_pmc2[i][powerpc_data_min],
|
||||
perfdata_pmc2[i][powerpc_data_max],
|
||||
(double)perfdata_pmc2[i][powerpc_data_sum] /
|
||||
(double)perfdata_pmc2[i][powerpc_data_num],
|
||||
perfdata_pmc2[i][powerpc_data_num]);
|
||||
if (perfdata_pmc3[i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr, " Function \"%s\" (pmc3):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
perfdata_pmc3[i][powerpc_data_min],
|
||||
perfdata_pmc3[i][powerpc_data_max],
|
||||
(double)perfdata_pmc3[i][powerpc_data_sum] /
|
||||
(double)perfdata_pmc3[i][powerpc_data_num],
|
||||
perfdata_pmc3[i][powerpc_data_num]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -139,7 +148,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
i += 16;
|
||||
}
|
||||
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
|
||||
asm volatile("dcbz %0,%1" : : "r" (i), "r" (blocks) : "memory");
|
||||
asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
||||
}
|
||||
if (misal) {
|
||||
((unsigned long*)blocks)[188] = 0L;
|
||||
@ -172,7 +181,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
}
|
||||
else
|
||||
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
|
||||
asm volatile("dcbzl %0,%1" : : "r" (i), "r" (blocks) : "memory");
|
||||
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
||||
}
|
||||
#else
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
@ -209,7 +218,9 @@ long check_dcbzl_effect(void)
|
||||
|
||||
memset(fakedata, 0xFF, 1024);
|
||||
|
||||
asm volatile("dcbzl %0, %1" : : "r" (fakedata_middle), "r" (zero));
|
||||
/* below the constraint "b" seems to mean "Address base register"
|
||||
in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
|
||||
asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
|
||||
|
||||
for (i = 0; i < 1024 ; i ++)
|
||||
{
|
||||
@ -300,10 +311,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
||||
perfdata[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata[i][powerpc_data_num] = 0x0000000000000000;
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata_miss[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata_miss[i][powerpc_data_num] = 0x0000000000000000;
|
||||
perfdata_pmc2[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata_pmc2[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata_pmc2[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata_pmc2[i][powerpc_data_num] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata_pmc3[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_num] = 0x0000000000000000;
|
||||
#endif /* POWERPC_PERF_USE_PMC */
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,17 @@
|
||||
#ifndef _DSPUTIL_PPC_
|
||||
#define _DSPUTIL_PPC_
|
||||
|
||||
#ifdef CONFIG_DARWIN
|
||||
/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
|
||||
We assume here that the Darwin GCC is from Apple.... */
|
||||
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
|
||||
#define NO_DCBZL
|
||||
#endif
|
||||
#else /* CONFIG_DARWIN */
|
||||
/* I don't think any non-Apple assembler knows about DCBZL */
|
||||
#define NO_DCBZL
|
||||
#endif /* CONFIG_DARWIN */
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
void powerpc_display_perf_report(void);
|
||||
/* if you add to the enum below, also add to the perfname array
|
||||
@ -49,7 +60,8 @@ enum powerpc_data_index {
|
||||
};
|
||||
extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
extern unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
|
||||
extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
|
||||
extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
|
||||
#endif
|
||||
|
||||
#ifndef POWERPC_PERF_USE_PMC
|
||||
@ -75,12 +87,17 @@ extern unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
|
||||
|
||||
#else /* POWERPC_PERF_USE_PMC */
|
||||
#define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
|
||||
#define POWERPC_GET_MISS(a) asm volatile("mfspr %0, 938" : "=r" (a))
|
||||
#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, miss_start, miss_stop
|
||||
#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_MISS(miss_start); POWERPC_GET_CYCLES(cycles_start); } while (0)
|
||||
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
|
||||
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
|
||||
#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop
|
||||
#define POWERPC_TBL_START_COUNT(a, cond) do { \
|
||||
POWERPC_GET_PMC3(pmc3_start); \
|
||||
POWERPC_GET_PMC2(pmc2_start); \
|
||||
POWERPC_GET_CYCLES(cycles_start); } while (0)
|
||||
#define POWERPC_TBL_STOP_COUNT(a, cond) do { \
|
||||
POWERPC_GET_CYCLES(cycles_stop); \
|
||||
POWERPC_GET_MISS(miss_stop); \
|
||||
POWERPC_GET_PMC2(pmc2_stop); \
|
||||
POWERPC_GET_PMC3(pmc3_stop); \
|
||||
if (cycles_stop >= cycles_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
@ -95,18 +112,32 @@ extern unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
|
||||
perfdata[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
if (miss_stop >= miss_start) \
|
||||
if (pmc2_stop >= pmc2_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
miss_stop - miss_start; \
|
||||
pmc2_stop - pmc2_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata_miss[a][powerpc_data_min]) \
|
||||
perfdata_miss[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata_miss[a][powerpc_data_max]) \
|
||||
perfdata_miss[a][powerpc_data_max] = diff; \
|
||||
perfdata_miss[a][powerpc_data_sum] += diff; \
|
||||
perfdata_miss[a][powerpc_data_num] ++; \
|
||||
if (diff < perfdata_pmc2[a][powerpc_data_min]) \
|
||||
perfdata_pmc2[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata_pmc2[a][powerpc_data_max]) \
|
||||
perfdata_pmc2[a][powerpc_data_max] = diff; \
|
||||
perfdata_pmc2[a][powerpc_data_sum] += diff; \
|
||||
perfdata_pmc2[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
if (pmc3_stop >= pmc3_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
pmc3_stop - pmc3_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata_pmc3[a][powerpc_data_min]) \
|
||||
perfdata_pmc3[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata_pmc3[a][powerpc_data_max]) \
|
||||
perfdata_pmc3[a][powerpc_data_max] = diff; \
|
||||
perfdata_pmc3[a][powerpc_data_sum] += diff; \
|
||||
perfdata_pmc3[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
@ -13,15 +13,8 @@
|
||||
|
||||
#ifdef CONFIG_DARWIN
|
||||
#define AVV(x...) (x)
|
||||
/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
|
||||
We assume here that the Darwin GCC is from Apple.... */
|
||||
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
|
||||
#define NO_DCBZL
|
||||
#endif
|
||||
#else
|
||||
#define AVV(x...) {x}
|
||||
/* I don't think any non-Apple assembler knows about DCBZL */
|
||||
#define NO_DCBZL
|
||||
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
|
||||
|
||||
/* This code was provided to me by Bartosch Pixa
|
||||
|
@ -28,9 +28,10 @@
|
||||
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
|
||||
to preserve proper dst alignement.
|
||||
*/
|
||||
#define GMC1_PERF_COND (h==8)
|
||||
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_gmc1_num, h == 8);
|
||||
POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
const int A=(16-x16)*(16-y16);
|
||||
const int B=( x16)*(16-y16);
|
||||
@ -38,7 +39,7 @@ POWERPC_TBL_DECLARE(altivec_gmc1_num, h == 8);
|
||||
const int D=( x16)*( y16);
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
for(i=0; i<h; i++)
|
||||
{
|
||||
@ -54,7 +55,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
|
||||
src+= stride;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
|
||||
@ -77,7 +78,7 @@ POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
|
||||
unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
|
||||
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
tempA = vec_ld(0, (unsigned short*)ABCD);
|
||||
Av = vec_splat(tempA, 0);
|
||||
@ -165,7 +166,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
|
||||
src += stride;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user