dxva2: gpu_memcpy: fix build for GCC 4.8.3

This commit is contained in:
James Ross-Gowan 2014-10-26 15:17:43 +11:00 committed by wm4
parent 3b34f0078d
commit 0166f2c7a0
1 changed files with 10 additions and 3 deletions

View File

@ -19,14 +19,18 @@
* Taken from the QuickSync decoder by Eric Gur * Taken from the QuickSync decoder by Eric Gur
*/ */
#include <emmintrin.h> #ifndef GPU_MEMCPY_SSE4_H_
#define GPU_MEMCPY_SSE4_H_
#pragma GCC push_options
#pragma GCC target("sse4.1")
#include <smmintrin.h>
// gpu_memcpy is a memcpy style function that copied data very fast from a // gpu_memcpy is a memcpy style function that copied data very fast from a
// GPU tiled memory (write back) // GPU tiled memory (write back)
// Performance tip: page offset (12 lsb) of both addresses should be different // Performance tip: page offset (12 lsb) of both addresses should be different
// optimally use a 2K offset between them. // optimally use a 2K offset between them.
__attribute__((target("sse4"))) static inline void static inline void *gpu_memcpy(void *restrict d, const void *restrict s, size_t size)
*gpu_memcpy(void *restrict d, const void *restrict s, size_t size)
{ {
static const size_t regsInLoop = sizeof(size_t) * 2; // 8 or 16 static const size_t regsInLoop = sizeof(size_t) * 2; // 8 or 16
@ -127,3 +131,6 @@ __attribute__((target("sse4"))) static inline void
return d; return d;
} }
#pragma GCC pop_options
#endif