mirror of https://git.ffmpeg.org/ffmpeg.git
Use updated motion compensation routines.
Originally committed as revision 713 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
3530320dbf
commit
dde3f77dbc
|
@ -105,132 +105,137 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
|
||||
Since the immediate result could be greater than 255, we do the
|
||||
shift first. The result is too low by one if the bytes were both
|
||||
odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */
|
||||
static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
|
||||
static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
|
||||
{
|
||||
UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
|
||||
l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
|
||||
l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
|
||||
return l1 + l2 + correction;
|
||||
return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
||||
}
|
||||
|
||||
/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
|
||||
The '1' only has an effect when one byte is even and the other odd,
|
||||
i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
|
||||
Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */
|
||||
static inline UINT64 avg2(UINT64 l1, UINT64 l2)
|
||||
static inline uint64_t avg2(uint64_t a, uint64_t b)
|
||||
{
|
||||
UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
|
||||
l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
|
||||
l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
|
||||
return l1 + l2 + correction;
|
||||
return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
||||
}
|
||||
|
||||
static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
|
||||
static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
|
||||
{
|
||||
UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
|
||||
uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
|
||||
return r1 + r2;
|
||||
}
|
||||
|
||||
static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
|
||||
static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2,
|
||||
uint64_t l3, uint64_t l4)
|
||||
{
|
||||
UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
|
||||
uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
||||
+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
||||
uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
|
||||
+ (l2 & BYTE_VEC(0x03))
|
||||
+ (l3 & BYTE_VEC(0x03))
|
||||
+ (l4 & BYTE_VEC(0x03))
|
||||
+ BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
|
||||
return r1 + r2;
|
||||
}
|
||||
|
||||
#define PIXOPNAME(suffix) put ## suffix
|
||||
#define BTYPE UINT8
|
||||
#define OP(LOAD, STORE, INCR) \
|
||||
do { \
|
||||
STORE(LOAD(pixels), block); \
|
||||
pixels += line_size; \
|
||||
block += INCR; \
|
||||
} while (--h)
|
||||
|
||||
#define OP_X2(LOAD, STORE, INCR) \
|
||||
do { \
|
||||
uint64_t pix1, pix2; \
|
||||
\
|
||||
pix1 = LOAD(pixels); \
|
||||
pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
STORE(AVG2(pix1, pix2), block); \
|
||||
pixels += line_size; \
|
||||
block += INCR; \
|
||||
} while (--h)
|
||||
|
||||
#define OP_Y2(LOAD, STORE, INCR) \
|
||||
do { \
|
||||
uint64_t pix = LOAD(pixels); \
|
||||
do { \
|
||||
uint64_t next_pix; \
|
||||
\
|
||||
pixels += line_size; \
|
||||
next_pix = LOAD(pixels); \
|
||||
STORE(AVG2(pix, next_pix), block); \
|
||||
block += INCR; \
|
||||
pix = next_pix; \
|
||||
} while (--h); \
|
||||
} while (0)
|
||||
|
||||
#define OP_XY2(LOAD, STORE, INCR) \
|
||||
do { \
|
||||
uint64_t pix1 = LOAD(pixels); \
|
||||
uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
\
|
||||
do { \
|
||||
uint64_t next_pix1, next_pix2; \
|
||||
\
|
||||
pixels += line_size; \
|
||||
next_pix1 = LOAD(pixels); \
|
||||
next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
||||
\
|
||||
STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \
|
||||
\
|
||||
block += INCR; \
|
||||
pix1 = next_pix1; \
|
||||
pix2 = next_pix2; \
|
||||
} while (--h); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \
|
||||
static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \
|
||||
const uint8_t *pixels, \
|
||||
int line_size, int h) \
|
||||
{ \
|
||||
if ((size_t) pixels & 0x7) { \
|
||||
OPKIND(uldq, STORE, INCR); \
|
||||
} else { \
|
||||
OPKIND(ldq, STORE, INCR); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define PIXOP(BTYPE, OPNAME, STORE, INCR) \
|
||||
MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \
|
||||
MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \
|
||||
MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \
|
||||
MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR);
|
||||
|
||||
/* Rounding primitives. */
|
||||
#define AVG2 avg2
|
||||
#define AVG4 avg4
|
||||
#define STORE(l, b) stq(l, b)
|
||||
#include "pixops.h"
|
||||
#undef PIXOPNAME
|
||||
#undef BTYPE
|
||||
PIXOP(uint8_t, put, STORE, line_size);
|
||||
|
||||
#undef STORE
|
||||
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
||||
PIXOP(uint8_t, avg, STORE, line_size);
|
||||
|
||||
/* Not rounding primitives. */
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
#undef STORE
|
||||
|
||||
#define PIXOPNAME(suffix) put_no_rnd ## suffix
|
||||
#define BTYPE UINT8
|
||||
#define AVG2 avg2_no_rnd
|
||||
#define AVG4 avg4_no_rnd
|
||||
#define STORE(l, b) stq(l, b)
|
||||
#include "pixops.h"
|
||||
#undef PIXOPNAME
|
||||
#undef BTYPE
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
PIXOP(uint8_t, put_no_rnd, STORE, line_size);
|
||||
|
||||
#undef STORE
|
||||
|
||||
/* The following functions are untested. */
|
||||
#if 0
|
||||
|
||||
#define PIXOPNAME(suffix) avg ## suffix
|
||||
#define BTYPE UINT8
|
||||
#define AVG2 avg2
|
||||
#define AVG4 avg4
|
||||
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
||||
#include "pixops.h"
|
||||
#undef PIXOPNAME
|
||||
#undef BTYPE
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
#undef STORE
|
||||
|
||||
#define PIXOPNAME(suffix) avg_no_rnd ## suffix
|
||||
#define BTYPE UINT8
|
||||
#define AVG2 avg2_no_rnd
|
||||
#define AVG4 avg4_no_rnd
|
||||
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
||||
#include "pixops.h"
|
||||
#undef PIXOPNAME
|
||||
#undef BTYPE
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
#undef STORE
|
||||
|
||||
#define PIXOPNAME(suffix) sub ## suffix
|
||||
#define BTYPE DCTELEM
|
||||
#define AVG2 avg2
|
||||
#define AVG4 avg4
|
||||
#define STORE(l, block) do { \
|
||||
UINT64 xxx = l; \
|
||||
(block)[0] -= (xxx >> 0) & 0xff; \
|
||||
(block)[1] -= (xxx >> 8) & 0xff; \
|
||||
(block)[2] -= (xxx >> 16) & 0xff; \
|
||||
(block)[3] -= (xxx >> 24) & 0xff; \
|
||||
(block)[4] -= (xxx >> 32) & 0xff; \
|
||||
(block)[5] -= (xxx >> 40) & 0xff; \
|
||||
(block)[6] -= (xxx >> 48) & 0xff; \
|
||||
(block)[7] -= (xxx >> 56) & 0xff; \
|
||||
} while (0)
|
||||
#include "pixops.h"
|
||||
#undef PIXOPNAME
|
||||
#undef BTYPE
|
||||
#undef AVG2
|
||||
#undef AVG4
|
||||
#undef STORE
|
||||
|
||||
#endif
|
||||
PIXOP(uint8_t, avg_no_rnd, STORE, line_size);
|
||||
|
||||
void dsputil_init_alpha(void)
|
||||
{
|
||||
|
@ -244,6 +249,16 @@ void dsputil_init_alpha(void)
|
|||
put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
|
||||
put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
|
||||
|
||||
avg_pixels_tab[0] = avg_pixels_axp;
|
||||
avg_pixels_tab[1] = avg_pixels_x2_axp;
|
||||
avg_pixels_tab[2] = avg_pixels_y2_axp;
|
||||
avg_pixels_tab[3] = avg_pixels_xy2_axp;
|
||||
|
||||
avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp;
|
||||
avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp;
|
||||
avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp;
|
||||
avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp;
|
||||
|
||||
/* amask clears all bits that correspond to present features. */
|
||||
if (amask(AMASK_MVI) == 0) {
|
||||
put_pixels_clamped = put_pixels_clamped_mvi_asm;
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
/*
|
||||
* Alpha optimized DSP utils
|
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* This file is intended to be #included with proper definitions of
|
||||
* PIXOPNAME, BTYPE, AVG2, AVG4 and STORE. */
|
||||
|
||||
static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
|
||||
int line_size, int h)
|
||||
{
|
||||
if ((size_t) pixels & 0x7) {
|
||||
do {
|
||||
STORE(uldq(pixels), block);
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
} else {
|
||||
do {
|
||||
STORE(ldq(pixels), block);
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
}
|
||||
}
|
||||
|
||||
static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
|
||||
int line_size, int h)
|
||||
{
|
||||
if ((size_t) pixels & 0x7) {
|
||||
do {
|
||||
UINT64 pix1, pix2;
|
||||
|
||||
pix1 = uldq(pixels);
|
||||
pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
STORE(AVG2(pix1, pix2), block);
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
} else {
|
||||
do {
|
||||
UINT64 pix1, pix2;
|
||||
|
||||
pix1 = ldq(pixels);
|
||||
pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
STORE(AVG2(pix1, pix2), block);
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
}
|
||||
}
|
||||
|
||||
static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
|
||||
int line_size, int h)
|
||||
{
|
||||
if ((size_t) pixels & 0x7) {
|
||||
UINT64 pix = uldq(pixels);
|
||||
do {
|
||||
UINT64 next_pix;
|
||||
|
||||
pixels += line_size;
|
||||
next_pix = uldq(pixels);
|
||||
STORE(AVG2(pix, next_pix), block);
|
||||
block += line_size;
|
||||
pix = next_pix;
|
||||
} while (--h);
|
||||
} else {
|
||||
UINT64 pix = ldq(pixels);
|
||||
do {
|
||||
UINT64 next_pix;
|
||||
|
||||
pixels += line_size;
|
||||
next_pix = ldq(pixels);
|
||||
STORE(AVG2(pix, next_pix), block);
|
||||
block += line_size;
|
||||
pix = next_pix;
|
||||
} while (--h);
|
||||
}
|
||||
}
|
||||
|
||||
/* This could be further sped up by recycling AVG4 intermediate
|
||||
results from the previous loop pass. */
|
||||
static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
|
||||
int line_size, int h)
|
||||
{
|
||||
if ((size_t) pixels & 0x7) {
|
||||
UINT64 pix1 = uldq(pixels);
|
||||
UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
|
||||
do {
|
||||
UINT64 next_pix1, next_pix2;
|
||||
|
||||
pixels += line_size;
|
||||
next_pix1 = uldq(pixels);
|
||||
next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
|
||||
STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
|
||||
|
||||
block += line_size;
|
||||
pix1 = next_pix1;
|
||||
pix2 = next_pix2;
|
||||
} while (--h);
|
||||
} else {
|
||||
UINT64 pix1 = ldq(pixels);
|
||||
UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
|
||||
do {
|
||||
UINT64 next_pix1, next_pix2;
|
||||
|
||||
pixels += line_size;
|
||||
next_pix1 = ldq(pixels);
|
||||
next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
|
||||
|
||||
STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
|
||||
|
||||
block += line_size;
|
||||
pix1 = next_pix1;
|
||||
pix2 = next_pix2;
|
||||
} while (--h);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue