vo_opengl: use glBufferSubData instead of glMapBufferRange

Performance seems pretty much unchanged but I no longer get nasty spikes
on NUMA systems, probably because glBufferSubData runs in the driver or
something.

As a simplification of the code, we also just size the PBO to always
have the full size, even for cropped textures. This seems slower but not
by relevant amounts, and only affects e.g. --vf=crop. It also slightly
increases VRAM usage for textures with big strides.

This new code path is especially nice because it no longer depends on
GL_ARB_map_buffer_range, and no longer uses any functions that can
possibly fail, thus simplifying control flow and seemingly deprecating
the manpage's claim about possible image corruption.

In theory we could also reduce NUM_PBO_BUFFERS since it doesn't seem
like we're streaming uploads anyway, but leave it in there just in
case some drivers disagree...
This commit is contained in:
Niklas Haas 2017-07-16 17:17:39 +02:00
parent 46bfa3726f
commit dead206873
4 changed files with 10 additions and 27 deletions

View File

@ -4136,9 +4136,6 @@ The following video options are currently all specific to ``--vo=opengl`` and
source video size is huge (e.g. so called "4K" video). On other drivers it
might be slower or cause latency issues.
In theory, this can sometimes lead to sporadic and temporary image
corruption (because reupload is not retried when it fails).
``--dither-depth=<N|no|auto>``
Set dither target depth to N. Default: no.

View File

@ -95,6 +95,7 @@ static const struct gl_functions gl_functions[] = {
DEF_FN(BindTexture),
DEF_FN(BlendFuncSeparate),
DEF_FN(BufferData),
DEF_FN(BufferSubData),
DEF_FN(Clear),
DEF_FN(ClearColor),
DEF_FN(CompileShader),

View File

@ -128,6 +128,7 @@ struct GL {
GLbitfield);
GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum);
void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum);
void (GLAPIENTRY *BufferSubData)(GLenum, GLintptr, GLsizeiptr, const GLvoid *);
void (GLAPIENTRY *ActiveTexture)(GLenum);
void (GLAPIENTRY *BindTexture)(GLenum, GLuint);
int (GLAPIENTRY *SwapInterval)(int);

View File

@ -1310,15 +1310,16 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
assert(x + w <= tex_w && y + h <= tex_h);
if (!use_pbo || !gl->MapBufferRange)
goto no_pbo;
if (!use_pbo) {
gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
return;
}
// We align the buffer size to 4096 to avoid possible subregion
// dependencies. This is not a strict requirement (the spec requires no
// alignment), but a good precaution for performance reasons
size_t pix_stride = gl_bytes_per_pixel(format, type);
size_t buffer_size = FFALIGN(pix_stride * tex_w * tex_h, 4096);
size_t needed_size = pix_stride * w * h;
size_t needed_size = stride * h;
size_t buffer_size = FFALIGN(needed_size, 4096);
if (buffer_size != pbo->buffer_size)
gl_pbo_upload_uninit(pbo);
@ -1345,26 +1346,9 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
void *data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, offset, needed_size,
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT);
if (!data)
goto no_pbo;
memcpy_pic(data, dataptr, pix_stride * w, h, pix_stride * w, stride);
if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) {
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
goto no_pbo;
}
gl_upload_tex(gl, target, format, type, (void *)offset, pix_stride * w, x, y, w, h);
gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
return;
no_pbo:
gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
}
void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)