diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 942eef780c..db11d529fb 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -105,132 +105,137 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, } #endif -/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1 - Since the immediate result could be greater than 255, we do the - shift first. The result is too low by one if the bytes were both - odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2) +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) { - UINT64 correction = (l1 & l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1 - The '1' only has an effect when one byte is even and the other odd, - i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01). - Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2(UINT64 l1, UINT64 l2) +static inline uint64_t avg2(uint64_t a, uint64_t b) { - UINT64 correction = (l1 | l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } -static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2, + uint64_t l3, uint64_t l4) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } -#define PIXOPNAME(suffix) put ## suffix -#define BTYPE UINT8 +#define OP(LOAD, STORE, INCR) \ + do { \ + STORE(LOAD(pixels), block); \ + pixels += line_size; \ + block += INCR; \ + } while (--h) + +#define OP_X2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix1, pix2; \ + \ + pix1 = LOAD(pixels); \ + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE(AVG2(pix1, pix2), block); \ + pixels += line_size; \ + block += INCR; \ + } while (--h) + +#define OP_Y2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix = LOAD(pixels); \ + do { \ + uint64_t next_pix; \ + \ + pixels += line_size; \ + next_pix = LOAD(pixels); \ + STORE(AVG2(pix, next_pix), block); \ + block += INCR; \ + pix = next_pix; \ + } while (--h); \ + } while (0) + +#define OP_XY2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix1 = LOAD(pixels); \ + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + do { \ + uint64_t next_pix1, next_pix2; \ + \ + pixels += line_size; \ + next_pix1 = LOAD(pixels); \ + next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \ + \ + block += INCR; \ + pix1 = next_pix1; \ + pix2 = next_pix2; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \ +static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \ + const uint8_t *pixels, \ + int line_size, int h) \ +{ \ + if ((size_t) pixels & 0x7) { \ + OPKIND(uldq, STORE, INCR); \ + } else { \ + OPKIND(ldq, STORE, INCR); \ + } \ +} + +#define PIXOP(BTYPE, OPNAME, STORE, INCR) \ + MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); + +/* Rounding primitives. */ #define AVG2 avg2 #define AVG4 avg4 #define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE +PIXOP(uint8_t, put, STORE, line_size); + +#undef STORE +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(uint8_t, avg, STORE, line_size); + +/* Not rounding primitives. */ #undef AVG2 #undef AVG4 #undef STORE - -#define PIXOPNAME(suffix) put_no_rnd ## suffix -#define BTYPE UINT8 #define AVG2 avg2_no_rnd #define AVG4 avg4_no_rnd #define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 +PIXOP(uint8_t, put_no_rnd, STORE, line_size); + #undef STORE - -/* The following functions are untested. */ -#if 0 - -#define PIXOPNAME(suffix) avg ## suffix -#define BTYPE UINT8 -#define AVG2 avg2 -#define AVG4 avg4 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE - -#define PIXOPNAME(suffix) avg_no_rnd ## suffix -#define BTYPE UINT8 -#define AVG2 avg2_no_rnd -#define AVG4 avg4_no_rnd -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE - -#define PIXOPNAME(suffix) sub ## suffix -#define BTYPE DCTELEM -#define AVG2 avg2 -#define AVG4 avg4 -#define STORE(l, block) do { \ - UINT64 xxx = l; \ - (block)[0] -= (xxx >> 0) & 0xff; \ - (block)[1] -= (xxx >> 8) & 0xff; \ - (block)[2] -= (xxx >> 16) & 0xff; \ - (block)[3] -= (xxx >> 24) & 0xff; \ - (block)[4] -= (xxx >> 32) & 0xff; \ - (block)[5] -= (xxx >> 40) & 0xff; \ - (block)[6] -= (xxx >> 48) & 0xff; \ - (block)[7] -= (xxx >> 56) & 0xff; \ -} while (0) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE - -#endif +PIXOP(uint8_t, avg_no_rnd, STORE, line_size); void dsputil_init_alpha(void) { @@ -244,6 +249,16 @@ void dsputil_init_alpha(void) put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; + avg_pixels_tab[0] = avg_pixels_axp; + avg_pixels_tab[1] = avg_pixels_x2_axp; + avg_pixels_tab[2] = avg_pixels_y2_axp; + avg_pixels_tab[3] = avg_pixels_xy2_axp; + + avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; + avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; + avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; + avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; + /* amask clears all bits that correspond to present features. */ if (amask(AMASK_MVI) == 0) { put_pixels_clamped = put_pixels_clamped_mvi_asm; diff --git a/libavcodec/alpha/pixops.h b/libavcodec/alpha/pixops.h deleted file mode 100644 index 118d7ae23f..0000000000 --- a/libavcodec/alpha/pixops.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* This file is intended to be #included with proper definitions of - * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE. */ - -static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - do { - STORE(uldq(pixels), block); - pixels += line_size; - block += line_size; - } while (--h); - } else { - do { - STORE(ldq(pixels), block); - pixels += line_size; - block += line_size; - } while (--h); - } -} - -static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - do { - UINT64 pix1, pix2; - - pix1 = uldq(pixels); - pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - STORE(AVG2(pix1, pix2), block); - pixels += line_size; - block += line_size; - } while (--h); - } else { - do { - UINT64 pix1, pix2; - - pix1 = ldq(pixels); - pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - STORE(AVG2(pix1, pix2), block); - pixels += line_size; - block += line_size; - } while (--h); - } -} - -static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - UINT64 pix = uldq(pixels); - do { - UINT64 next_pix; - - pixels += line_size; - next_pix = uldq(pixels); - STORE(AVG2(pix, next_pix), block); - block += line_size; - pix = next_pix; - } while (--h); - } else { - UINT64 pix = ldq(pixels); - do { - UINT64 next_pix; - - pixels += line_size; - next_pix = ldq(pixels); - STORE(AVG2(pix, next_pix), block); - block += line_size; - pix = next_pix; - } while (--h); - } -} - -/* This could be further sped up by recycling AVG4 intermediate - results from the previous loop pass. */ -static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels, - int line_size, int h) -{ - if ((size_t) pixels & 0x7) { - UINT64 pix1 = uldq(pixels); - UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - - do { - UINT64 next_pix1, next_pix2; - - pixels += line_size; - next_pix1 = uldq(pixels); - next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); - - STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); - - block += line_size; - pix1 = next_pix1; - pix2 = next_pix2; - } while (--h); - } else { - UINT64 pix1 = ldq(pixels); - UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56); - - do { - UINT64 next_pix1, next_pix2; - - pixels += line_size; - next_pix1 = ldq(pixels); - next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56); - - STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); - - block += line_size; - pix1 = next_pix1; - pix2 = next_pix2; - } while (--h); - } -}