mirror of https://git.ffmpeg.org/ffmpeg.git
checkasm: Test more h264 idct variants
Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
3cae7f8b9b
commit
516c479172
|
@ -22,6 +22,7 @@
|
||||||
#include "checkasm.h"
|
#include "checkasm.h"
|
||||||
#include "libavcodec/avcodec.h"
|
#include "libavcodec/avcodec.h"
|
||||||
#include "libavcodec/h264dsp.h"
|
#include "libavcodec/h264dsp.h"
|
||||||
|
#include "libavcodec/h264data.h"
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
#include "libavutil/internal.h"
|
#include "libavutil/internal.h"
|
||||||
#include "libavutil/intreadwrite.h"
|
#include "libavutil/intreadwrite.h"
|
||||||
|
@ -223,10 +224,97 @@ static void check_idct(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
report("idct");
|
}
|
||||||
|
|
||||||
|
static void check_idct_multiple(void)
|
||||||
|
{
|
||||||
|
LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
|
||||||
|
LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
|
||||||
|
H264DSPContext h;
|
||||||
|
int bit_depth, i, y, func;
|
||||||
|
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
|
||||||
|
|
||||||
|
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
|
||||||
|
ff_h264dsp_init(&h, bit_depth, 1);
|
||||||
|
for (func = 0; func < 3; func++) {
|
||||||
|
void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
|
||||||
|
const char *name;
|
||||||
|
int sz = 4, intra = 0;
|
||||||
|
int block_offset[16] = { 0 };
|
||||||
|
switch (func) {
|
||||||
|
case 0:
|
||||||
|
idct = h.h264_idct_add16;
|
||||||
|
name = "h264_idct_add16";
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
idct = h.h264_idct_add16intra;
|
||||||
|
name = "h264_idct_add16intra";
|
||||||
|
intra = 1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
idct = h.h264_idct8_add4;
|
||||||
|
name = "h264_idct8_add4";
|
||||||
|
sz = 8;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
memset(nnzc, 0, 15 * 8);
|
||||||
|
memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
|
||||||
|
for (i = 0; i < 16 * 16; i += sz * sz) {
|
||||||
|
uint8_t src[8 * 8 * 2];
|
||||||
|
uint8_t dst[8 * 8 * 2];
|
||||||
|
int16_t coef[8 * 8 * 2];
|
||||||
|
int index = i / sz;
|
||||||
|
int block_y = (index / 16) * sz;
|
||||||
|
int block_x = index % 16;
|
||||||
|
int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
|
||||||
|
int nnz = rnd() % 3;
|
||||||
|
|
||||||
|
randomize_buffers();
|
||||||
|
if (sz == 4)
|
||||||
|
dct4x4(coef, bit_depth);
|
||||||
|
else
|
||||||
|
dct8x8(coef, bit_depth);
|
||||||
|
|
||||||
|
for (y = 0; y < sz; y++)
|
||||||
|
memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
|
||||||
|
&dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
|
||||||
|
|
||||||
|
if (nnz > 1)
|
||||||
|
nnz = sz * sz;
|
||||||
|
memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
|
||||||
|
coef, nnz * SIZEOF_COEF);
|
||||||
|
|
||||||
|
if (intra && nnz == 1)
|
||||||
|
nnz = 0;
|
||||||
|
|
||||||
|
nnzc[scan8[i / 16]] = nnz;
|
||||||
|
block_offset[i / 16] = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
|
||||||
|
memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
|
||||||
|
memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
|
||||||
|
memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
|
||||||
|
memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
|
||||||
|
call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
|
||||||
|
call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
|
||||||
|
if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
|
||||||
|
memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
|
||||||
|
fail();
|
||||||
|
bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkasm_check_h264dsp(void)
|
void checkasm_check_h264dsp(void)
|
||||||
{
|
{
|
||||||
check_idct();
|
check_idct();
|
||||||
|
check_idct_multiple();
|
||||||
|
report("idct");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue