avcodec/mpegvideoencdsp: speed up draw_edges_8_c by inlining it for all used edge widths

This commit also restricts w to 4, 8, or 16.

Intel(R) Core(TM) i5-5300U CPU @ 2.30GHz:
                                    before    after
draw_edges_8_1724_4_c:             46796.5   7141.7  ( 6.55x)
draw_edges_8_1724_8_c:             43584.5   7216.5  ( 6.04x)
draw_edges_8_1724_16_c:            47007.2  10080.5  ( 4.66x)
draw_edges_128_407_4_c:            11199.0   4185.0  ( 2.68x)
draw_edges_128_407_8_c:            10660.2   4418.0  ( 2.41x)
draw_edges_128_407_16_c:           11800.2   4634.5  ( 2.55x)
draw_edges_1080_31_4_c:             1356.5    634.7  ( 2.14x)
draw_edges_1080_31_8_c:             1972.0   1430.2  ( 1.38x)
draw_edges_1080_31_16_c:            4621.0   4009.7  ( 1.15x)
draw_edges_1920_4_4_c:               834.5    795.2  ( 1.05x)
draw_edges_1920_4_4_negstride_c:     821.7    802.0  ( 1.02x)
draw_edges_1920_4_8_c:              2782.2   2650.7  ( 1.05x)
draw_edges_1920_4_8_negstride_c:    2724.7   2670.0  ( 1.02x)
draw_edges_1920_4_16_c:             6437.5   6327.7  ( 1.02x)
draw_edges_1920_4_16_negstride_c:   6395.2   6349.5  ( 1.01x)

A55:
                                    before    after
draw_edges_8_1724_4_c:             52540.4  19739.2  ( 2.66x)
draw_edges_8_1724_8_c:             45386.9  19847.4  ( 2.29x)
draw_edges_8_1724_16_c:            51995.4  23284.7  ( 2.23x)
draw_edges_128_407_4_c:            13401.1   6988.2  ( 1.92x)
draw_edges_128_407_8_c:            12218.4   7527.9  ( 1.62x)
draw_edges_128_407_16_c:           13695.9   8207.2  ( 1.67x)
draw_edges_1080_31_4_c:             3702.9   3110.4  ( 1.19x)
draw_edges_1080_31_8_c:             6015.6   5643.2  ( 1.07x)
draw_edges_1080_31_16_c:           12281.9  11901.4  ( 1.03x)
draw_edges_1920_4_4_c:              3957.9   3970.2  ( 1.00x)
draw_edges_1920_4_4_negstride_c:    3964.1   3825.2  ( 1.04x)
draw_edges_1920_4_8_c:              7757.9   7676.4  ( 1.01x)
draw_edges_1920_4_8_negstride_c:    7923.6   7812.4  ( 1.01x)
draw_edges_1920_4_16_c:            14791.6  15143.9  ( 0.98x)
draw_edges_1920_4_16_negstride_c:  14788.6  15163.4  ( 0.98x)

A76:
                                    before   after
draw_edges_8_1724_4_c:             39786.0  4968.5  ( 8.01x)
draw_edges_8_1724_8_c:             32971.5  5069.5  ( 6.50x)
draw_edges_8_1724_16_c:            40056.0  6017.2  ( 6.66x)
draw_edges_128_407_4_c:             9517.2  1210.5  ( 7.86x)
draw_edges_128_407_8_c:             8035.7  1346.2  ( 5.97x)
draw_edges_128_407_16_c:            9946.5  1648.2  ( 6.03x)
draw_edges_1080_31_4_c:             1308.0   660.7  ( 1.98x)
draw_edges_1080_31_8_c:             1785.5  1270.7  ( 1.41x)
draw_edges_1080_31_16_c:            3266.7  2591.5  ( 1.26x)
draw_edges_1920_4_4_c:              1151.0  1090.7  ( 1.06x)
draw_edges_1920_4_4_negstride_c:    1153.7  1096.5  ( 1.05x)
draw_edges_1920_4_8_c:              2220.7  2186.5  ( 1.02x)
draw_edges_1920_4_8_negstride_c:    2218.5  2193.5  ( 1.01x)
draw_edges_1920_4_16_c:             4324.2  4230.0  ( 1.02x)
draw_edges_1920_4_16_negstride_c:   4310.7  4233.0  ( 1.02x)
This commit is contained in:
Ramiro Polla 2024-08-21 16:55:55 +02:00
parent 3bfce2a104
commit 7e4784e40c

View File

@ -114,19 +114,31 @@ static int pix_norm1_c(const uint8_t *pix, int line_size)
return s;
}
static av_always_inline void draw_edges_lr(uint8_t *ptr, int wrap, int width, int height, int w)
{
for (int i = 0; i < height; i++) {
memset(ptr - w, ptr[0], w);
memset(ptr + width, ptr[width - 1], w);
ptr += wrap;
}
}
/* draw the edges of width 'w' of an image of size width, height */
// FIXME: Check that this is OK for MPEG-4 interlaced.
static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides)
{
uint8_t *ptr = buf, *last_line;
uint8_t *last_line;
int i;
/* left and right */
for (i = 0; i < height; i++) {
memset(ptr - w, ptr[0], w);
memset(ptr + width, ptr[width - 1], w);
ptr += wrap;
if (w == 16) {
draw_edges_lr(buf, wrap, width, height, 16);
} else if (w == 8) {
draw_edges_lr(buf, wrap, width, height, 8);
} else {
av_assert1(w == 4);
draw_edges_lr(buf, wrap, width, height, 4);
}
/* top and bottom + corners */