mirror of https://git.ffmpeg.org/ffmpeg.git
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows us to use width=8/16 instead of width=4 MC functions, we can now take more advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV filter of about 10%. Originally committed as revision 23853 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
4332bfbff8
commit
7c4dcf8165
|
@ -943,6 +943,39 @@ static inline void vp8_mc(VP8Context *s, int luma,
|
|||
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
|
||||
}
|
||||
|
||||
static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
|
||||
AVFrame *ref_frame, int x_off, int y_off,
|
||||
int bx_off, int by_off,
|
||||
int block_w, int block_h,
|
||||
int width, int height, VP56mv *mv)
|
||||
{
|
||||
VP56mv uvmv = *mv;
|
||||
|
||||
/* Y */
|
||||
vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off,
|
||||
ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
|
||||
block_w, block_h, width, height, s->linesize,
|
||||
s->put_pixels_tab[block_w == 8]);
|
||||
|
||||
/* U/V */
|
||||
if (s->profile == 3) {
|
||||
uvmv.x &= ~7;
|
||||
uvmv.y &= ~7;
|
||||
}
|
||||
x_off >>= 1; y_off >>= 1;
|
||||
bx_off >>= 1; by_off >>= 1;
|
||||
width >>= 1; height >>= 1;
|
||||
block_w >>= 1; block_h >>= 1;
|
||||
vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off,
|
||||
ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off,
|
||||
block_w, block_h, width, height, s->uvlinesize,
|
||||
s->put_pixels_tab[1 + (block_w == 4)]);
|
||||
vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off,
|
||||
ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
|
||||
block_w, block_h, width, height, s->uvlinesize,
|
||||
s->put_pixels_tab[1 + (block_w == 4)]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply motion vectors to prediction buffer, chapter 18.
|
||||
*/
|
||||
|
@ -951,29 +984,14 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||
{
|
||||
int x_off = mb_x << 4, y_off = mb_y << 4;
|
||||
int width = 16*s->mb_width, height = 16*s->mb_height;
|
||||
VP56mv uvmv;
|
||||
|
||||
if (mb->mode < VP8_MVMODE_SPLIT) {
|
||||
/* Y */
|
||||
vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv,
|
||||
x_off, y_off, 16, 16, width, height, s->linesize,
|
||||
s->put_pixels_tab[0]);
|
||||
|
||||
/* U/V */
|
||||
uvmv = mb->mv;
|
||||
if (s->profile == 3) {
|
||||
uvmv.x &= ~7;
|
||||
uvmv.y &= ~7;
|
||||
}
|
||||
x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
|
||||
vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv,
|
||||
x_off, y_off, 8, 8, width, height, s->uvlinesize,
|
||||
s->put_pixels_tab[1]);
|
||||
vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv,
|
||||
x_off, y_off, 8, 8, width, height, s->uvlinesize,
|
||||
s->put_pixels_tab[1]);
|
||||
} else {
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 0, 16, 16, width, height, &mb->mv);
|
||||
} else switch (mb->partitioning) {
|
||||
case VP8_SPLITMVMODE_4x4: {
|
||||
int x, y;
|
||||
VP56mv uvmv;
|
||||
|
||||
/* Y */
|
||||
for (y = 0; y < 4; y++) {
|
||||
|
@ -1016,6 +1034,30 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||
s->put_pixels_tab[2]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VP8_SPLITMVMODE_16x8:
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 0, 16, 8, width, height, &mb->bmv[0]);
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 8, 16, 8, width, height, &mb->bmv[8]);
|
||||
break;
|
||||
case VP8_SPLITMVMODE_8x16:
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 0, 8, 16, width, height, &mb->bmv[0]);
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
8, 0, 8, 16, width, height, &mb->bmv[2]);
|
||||
break;
|
||||
case VP8_SPLITMVMODE_8x8:
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 0, 8, 8, width, height, &mb->bmv[0]);
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
8, 0, 8, 8, width, height, &mb->bmv[2]);
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
0, 8, 8, 8, width, height, &mb->bmv[8]);
|
||||
vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
|
||||
8, 8, 8, 8, width, height, &mb->bmv[10]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -62,6 +62,13 @@ enum inter_submvmode {
|
|||
VP8_SUBMVMODE_NEW4X4
|
||||
};
|
||||
|
||||
enum inter_splitmvmode {
|
||||
VP8_SPLITMVMODE_16x8 = 0, ///< 2 16x8 blocks (vertical)
|
||||
VP8_SPLITMVMODE_8x16, ///< 2 8x16 blocks (horizontal)
|
||||
VP8_SPLITMVMODE_8x8, ///< 2x2 blocks of 8x8px each
|
||||
VP8_SPLITMVMODE_4x4, ///< 4x4 blocks of 4x4px each
|
||||
};
|
||||
|
||||
static const uint8_t vp8_pred4x4_mode[] =
|
||||
{
|
||||
[DC_PRED8x8] = DC_PRED,
|
||||
|
@ -130,10 +137,10 @@ static const uint8_t vp8_mbfirstidx[4][16] = {
|
|||
};
|
||||
|
||||
static const int8_t vp8_mbsplit_tree[3][2] = {
|
||||
{ -3, 1 }, // '0' - 16 individual MVs
|
||||
{ -2, 2 }, // '10' - quarter-based MVs
|
||||
{ -0, -1 } // '110' - top/bottom MVs,
|
||||
// '111' - left/right MVs
|
||||
{ -VP8_SPLITMVMODE_4x4, 1 }, // '0' - 16 individual MVs
|
||||
{ -VP8_SPLITMVMODE_8x8, 2 }, // '10' - quarter-based MVs
|
||||
{ -VP8_SPLITMVMODE_16x8, // '110' - top/bottom MVs
|
||||
-VP8_SPLITMVMODE_8x16 } // '111' - left/right MVs
|
||||
};
|
||||
static const uint8_t vp8_mbsplit_count[4] = { 2, 2, 4, 16 };
|
||||
static const uint8_t vp8_mbsplit_prob[3] = { 110, 111, 150 };
|
||||
|
|
Loading…
Reference in New Issue