mirror of https://git.ffmpeg.org/ffmpeg.git
PPC: simplify loading some values into altivec registers
Instead of filling a local array with the desired value and loading it, load a single element and vec_splat() it to fill the vector. Originally committed as revision 19691 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
1feec476aa
commit
b662e8395b
|
@ -23,6 +23,7 @@
|
||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "dsputil_ppc.h"
|
#include "dsputil_ppc.h"
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
|
#include "types_altivec.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
|
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
|
||||||
|
@ -32,9 +33,7 @@
|
||||||
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
||||||
{
|
{
|
||||||
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
||||||
const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) =
|
const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder;
|
||||||
{rounder, rounder, rounder, rounder,
|
|
||||||
rounder, rounder, rounder, rounder};
|
|
||||||
const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
|
const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
|
||||||
{
|
{
|
||||||
(16-x16)*(16-y16), /* A */
|
(16-x16)*(16-y16), /* A */
|
||||||
|
@ -60,7 +59,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||||
Cv = vec_splat(tempA, 2);
|
Cv = vec_splat(tempA, 2);
|
||||||
Dv = vec_splat(tempA, 3);
|
Dv = vec_splat(tempA, 3);
|
||||||
|
|
||||||
rounderV = vec_ld(0, (unsigned short*)rounder_a);
|
rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0);
|
||||||
|
|
||||||
// we'll be able to pick-up our 9 char elements
|
// we'll be able to pick-up our 9 char elements
|
||||||
// at src from those 32 bytes
|
// at src from those 32 bytes
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
|
|
||||||
#include "dsputil_ppc.h"
|
#include "dsputil_ppc.h"
|
||||||
#include "util_altivec.h"
|
#include "util_altivec.h"
|
||||||
|
#include "types_altivec.h"
|
||||||
|
|
||||||
// Swaps two variables (used for altivec registers)
|
// Swaps two variables (used for altivec registers)
|
||||||
#define SWAP(a,b) \
|
#define SWAP(a,b) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -504,29 +506,16 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
||||||
|
|
||||||
{
|
{
|
||||||
register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
|
register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
|
||||||
DECLARE_ALIGNED_16(short, qmul8[]) =
|
DECLARE_ALIGNED_16(short, qmul8) = qmul;
|
||||||
{
|
DECLARE_ALIGNED_16(short, qadd8) = qadd;
|
||||||
qmul, qmul, qmul, qmul,
|
|
||||||
qmul, qmul, qmul, qmul
|
|
||||||
};
|
|
||||||
DECLARE_ALIGNED_16(short, qadd8[]) =
|
|
||||||
{
|
|
||||||
qadd, qadd, qadd, qadd,
|
|
||||||
qadd, qadd, qadd, qadd
|
|
||||||
};
|
|
||||||
DECLARE_ALIGNED_16(short, nqadd8[]) =
|
|
||||||
{
|
|
||||||
-qadd, -qadd, -qadd, -qadd,
|
|
||||||
-qadd, -qadd, -qadd, -qadd
|
|
||||||
};
|
|
||||||
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
|
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
|
||||||
register vector bool short blockv_null, blockv_neg;
|
register vector bool short blockv_null, blockv_neg;
|
||||||
register short backup_0 = block[0];
|
register short backup_0 = block[0];
|
||||||
register int j = 0;
|
register int j = 0;
|
||||||
|
|
||||||
qmulv = vec_ld(0, qmul8);
|
qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
|
||||||
qaddv = vec_ld(0, qadd8);
|
qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
|
||||||
nqaddv = vec_ld(0, nqadd8);
|
nqaddv = vec_sub(vczero, qaddv);
|
||||||
|
|
||||||
#if 0 // block *is* 16 bytes-aligned, it seems.
|
#if 0 // block *is* 16 bytes-aligned, it seems.
|
||||||
// first make sure block[j] is 16 bytes-aligned
|
// first make sure block[j] is 16 bytes-aligned
|
||||||
|
|
Loading…
Reference in New Issue