mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 08:42:39 +00:00
prefetch pixels for future motion compensation. 2-5% faster h264.
Originally committed as revision 5203 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
7e815047e5
commit
513fbd8e5a
@ -3773,6 +3773,8 @@ static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
|
dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void just_return() { return; }
|
||||||
|
|
||||||
/* init static data */
|
/* init static data */
|
||||||
void dsputil_static_init(void)
|
void dsputil_static_init(void)
|
||||||
{
|
{
|
||||||
@ -4054,6 +4056,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->inner_add_yblock = ff_snow_inner_add_yblock;
|
c->inner_add_yblock = ff_snow_inner_add_yblock;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
c->prefetch= just_return;
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
dsputil_init_mmx(c, avctx);
|
dsputil_init_mmx(c, avctx);
|
||||||
#endif
|
#endif
|
||||||
|
@ -343,6 +343,8 @@ typedef struct DSPContext {
|
|||||||
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
|
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
|
||||||
void (*horizontal_compose97i)(DWTELEM *b, int width);
|
void (*horizontal_compose97i)(DWTELEM *b, int width);
|
||||||
void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
|
void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
|
||||||
|
|
||||||
|
void (*prefetch)(void *mem, int stride, int h);
|
||||||
} DSPContext;
|
} DSPContext;
|
||||||
|
|
||||||
void dsputil_static_init(void);
|
void dsputil_static_init(void);
|
||||||
|
@ -2752,6 +2752,22 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
|
|||||||
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
|
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void prefetch_motion(H264Context *h, int list){
|
||||||
|
/* fetch pixels for estimated mv 4 macroblocks ahead
|
||||||
|
* optimized for 64byte cache lines */
|
||||||
|
MpegEncContext * const s = &h->s;
|
||||||
|
const int refn = h->ref_cache[list][scan8[0]];
|
||||||
|
if(refn >= 0){
|
||||||
|
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
|
||||||
|
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
|
||||||
|
uint8_t **src= h->ref_list[list][refn].data;
|
||||||
|
int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
|
||||||
|
s->dsp.prefetch(src[0]+off, s->linesize, 4);
|
||||||
|
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
|
||||||
|
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
|
||||||
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
|
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
|
||||||
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
|
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
|
||||||
@ -2762,6 +2778,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
|
|||||||
|
|
||||||
assert(IS_INTER(mb_type));
|
assert(IS_INTER(mb_type));
|
||||||
|
|
||||||
|
prefetch_motion(h, 0);
|
||||||
|
|
||||||
if(IS_16X16(mb_type)){
|
if(IS_16X16(mb_type)){
|
||||||
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
|
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
|
||||||
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
|
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
|
||||||
@ -2833,6 +2851,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prefetch_motion(h, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void decode_init_vlc(H264Context *h){
|
static void decode_init_vlc(H264Context *h){
|
||||||
|
@ -2489,6 +2489,18 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define PREFETCH(name, op) \
|
||||||
|
void name(void *mem, int stride, int h){\
|
||||||
|
const uint8_t *p= mem;\
|
||||||
|
do{\
|
||||||
|
asm volatile(#op" %0" :: "m"(*p));\
|
||||||
|
p+= stride;\
|
||||||
|
}while(--h);\
|
||||||
|
}
|
||||||
|
PREFETCH(prefetch_mmx2, prefetcht0)
|
||||||
|
PREFETCH(prefetch_3dnow, prefetch)
|
||||||
|
#undef PREFETCH
|
||||||
|
|
||||||
#include "h264dsp_mmx.c"
|
#include "h264dsp_mmx.c"
|
||||||
|
|
||||||
/* external functions, from idct_mmx.c */
|
/* external functions, from idct_mmx.c */
|
||||||
@ -2749,6 +2761,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->h264_idct8_add= ff_h264_idct8_add_mmx;
|
c->h264_idct8_add= ff_h264_idct8_add_mmx;
|
||||||
|
|
||||||
if (mm_flags & MM_MMXEXT) {
|
if (mm_flags & MM_MMXEXT) {
|
||||||
|
c->prefetch = prefetch_mmx2;
|
||||||
|
|
||||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
||||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
|
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
|
||||||
|
|
||||||
@ -2879,6 +2893,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
|
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
|
||||||
#endif //CONFIG_ENCODERS
|
#endif //CONFIG_ENCODERS
|
||||||
} else if (mm_flags & MM_3DNOW) {
|
} else if (mm_flags & MM_3DNOW) {
|
||||||
|
c->prefetch = prefetch_3dnow;
|
||||||
|
|
||||||
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
|
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
|
||||||
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
|
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user