vda: add support for nv12 image formats

The hardware always decodes to nv12 so using this image format causes less cpu
usage than uyvy (which we are currently using, since Apple examples and other
free software use that). The reduction in cpu usage can add up to quite a bit,
especially for 4k or high fps video.

This needs an accompaning commit in libavcodec.
This commit is contained in:
Stefano Pigozzi 2015-04-11 17:17:52 +02:00
parent 5b085fd8b1
commit 5258c012fe
3 changed files with 115 additions and 21 deletions

View File

@ -31,7 +31,8 @@ static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info,
const char *decoder) const char *decoder)
{ {
hwdec_request_api(info, "vda"); hwdec_request_api(info, "vda");
if (!info || !info->hwctx)
return HWDEC_ERR_NO_CTX;
if (mp_codec_to_av_codec_id(decoder) != AV_CODEC_ID_H264) if (mp_codec_to_av_codec_id(decoder) != AV_CODEC_ID_H264)
return HWDEC_ERR_NO_CODEC; return HWDEC_ERR_NO_CODEC;
return 0; return 0;
@ -76,7 +77,14 @@ static void print_vda_error(struct mp_log *log, int lev, char *message,
static int init_decoder(struct lavc_ctx *ctx, int fmt, int w, int h) static int init_decoder(struct lavc_ctx *ctx, int fmt, int w, int h)
{ {
av_vda_default_free(ctx->avctx); av_vda_default_free(ctx->avctx);
#if HAVE_VDA_DEFAULT_INIT2
AVVDAContext *vdactx = av_vda_alloc_context();
vdactx->cv_pix_fmt_type = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
int err = av_vda_default_init2(ctx->avctx, vdactx);
#else
int err = av_vda_default_init(ctx->avctx); int err = av_vda_default_init(ctx->avctx);
#endif
if (err < 0) { if (err < 0) {
print_vda_error(ctx->log, MSGL_ERR, "failed to init VDA decoder", err); print_vda_error(ctx->log, MSGL_ERR, "failed to init VDA decoder", err);
return -1; return -1;

View File

@ -25,12 +25,62 @@
#include "video/mp_image_pool.h" #include "video/mp_image_pool.h"
#include "gl_hwdec.h" #include "gl_hwdec.h"
struct vda_gl_plane_format {
GLenum gl_format;
GLenum gl_type;
GLenum gl_internal_format;
};
struct vda_format {
uint32_t cvpixfmt;
int imgfmt;
int planes;
struct vda_gl_plane_format gl[MP_MAX_PLANES];
};
struct priv { struct priv {
CVPixelBufferRef pbuf; CVPixelBufferRef pbuf;
GLuint gl_texture; GLuint gl_planes[MP_MAX_PLANES];
struct mp_hwdec_ctx hwctx; struct mp_hwdec_ctx hwctx;
}; };
static struct vda_format vda_formats[] = {
{
.cvpixfmt = kCVPixelFormatType_422YpCbCr8,
.imgfmt = IMGFMT_UYVY,
.planes = 1,
.gl = {
{ GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE, GL_RGB }
}
}, {
.cvpixfmt = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange,
.imgfmt = IMGFMT_NV12,
.planes = 2,
.gl = {
{ GL_RED, GL_UNSIGNED_BYTE, GL_RED },
{ GL_RG, GL_UNSIGNED_BYTE, GL_RG } ,
}
}
};
static struct vda_format *vda_get_gl_format(uint32_t cvpixfmt)
{
for (int i = 0; i < MP_ARRAY_SIZE(vda_formats); i++) {
if (vda_formats[i].cvpixfmt == cvpixfmt)
return &vda_formats[i];
}
return NULL;
}
static struct vda_format *vda_get_gl_format_from_imgfmt(uint32_t imgfmt)
{
for (int i = 0; i < MP_ARRAY_SIZE(vda_formats); i++) {
if (vda_formats[i].imgfmt == imgfmt)
return &vda_formats[i];
}
return NULL;
}
static struct mp_image *download_image(struct mp_hwdec_ctx *ctx, static struct mp_image *download_image(struct mp_hwdec_ctx *ctx,
struct mp_image *hw_image, struct mp_image *hw_image,
struct mp_image_pool *swpool) struct mp_image_pool *swpool)
@ -40,16 +90,22 @@ static struct mp_image *download_image(struct mp_hwdec_ctx *ctx,
CVPixelBufferRef pbuf = (CVPixelBufferRef)hw_image->planes[3]; CVPixelBufferRef pbuf = (CVPixelBufferRef)hw_image->planes[3];
CVPixelBufferLockBaseAddress(pbuf, 0); CVPixelBufferLockBaseAddress(pbuf, 0);
void *base = CVPixelBufferGetBaseAddress(pbuf);
size_t width = CVPixelBufferGetWidth(pbuf); size_t width = CVPixelBufferGetWidth(pbuf);
size_t height = CVPixelBufferGetHeight(pbuf); size_t height = CVPixelBufferGetHeight(pbuf);
size_t stride = CVPixelBufferGetBytesPerRow(pbuf); uint32_t cvpixfmt = CVPixelBufferGetPixelFormatType(pbuf);
struct vda_format *f = vda_get_gl_format(cvpixfmt);
struct mp_image img = {0}; struct mp_image img = {0};
mp_image_setfmt(&img, IMGFMT_UYVY); mp_image_setfmt(&img, f->imgfmt);
mp_image_set_size(&img, width, height); mp_image_set_size(&img, width, height);
img.planes[0] = base;
img.stride[0] = stride; for (int i = 0; i < f->planes; i++) {
void *base = CVPixelBufferGetBaseAddressOfPlane(pbuf, i);
size_t stride = CVPixelBufferGetBytesPerRowOfPlane(pbuf, i);
img.planes[i] = base;
img.stride[i] = stride;
}
mp_image_copy_attributes(&img, hw_image); mp_image_copy_attributes(&img, hw_image);
struct mp_image *image = mp_image_pool_new_copy(swpool, &img); struct mp_image *image = mp_image_pool_new_copy(swpool, &img);
@ -81,10 +137,18 @@ static bool check_hwdec(struct gl_hwdec *hw)
static int create(struct gl_hwdec *hw) static int create(struct gl_hwdec *hw)
{ {
struct priv *p = talloc_zero(hw, struct priv); struct priv *p = talloc_zero(hw, struct priv);
hw->priv = p; hw->priv = p;
hw->converted_imgfmt = IMGFMT_UYVY;
hw->gl_texture_target = GL_TEXTURE_RECTANGLE; hw->gl_texture_target = GL_TEXTURE_RECTANGLE;
#if HAVE_VDA_DEFAULT_INIT2
struct vda_format *f = vda_get_gl_format_from_imgfmt(IMGFMT_NV12);
#else
struct vda_format *f = vda_get_gl_format_from_imgfmt(IMGFMT_UYVY);
#endif
hw->converted_imgfmt = f->imgfmt;
if (!check_hwdec(hw)) if (!check_hwdec(hw))
return -1; return -1;
@ -93,7 +157,7 @@ static int create(struct gl_hwdec *hw)
hw->hwctx->download_image = download_image; hw->hwctx->download_image = download_image;
GL *gl = hw->gl; GL *gl = hw->gl;
gl->GenTextures(1, &p->gl_texture); gl->GenTextures(MP_MAX_PLANES, p->gl_planes);
return 0; return 0;
} }
@ -118,20 +182,36 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image,
CVPixelBufferRetain(p->pbuf); CVPixelBufferRetain(p->pbuf);
IOSurfaceRef surface = CVPixelBufferGetIOSurface(p->pbuf); IOSurfaceRef surface = CVPixelBufferGetIOSurface(p->pbuf);
gl->BindTexture(hw->gl_texture_target, p->gl_texture); uint32_t cvpixfmt = CVPixelBufferGetPixelFormatType(p->pbuf);
struct vda_format *f = vda_get_gl_format(cvpixfmt);
if (!f) {
MP_ERR(hw, "CVPixelBuffer has unsupported format type\n");
return -1;
}
CGLError err = CGLTexImageIOSurface2D( const bool planar = CVPixelBufferIsPlanar(p->pbuf);
CGLGetCurrentContext(), hw->gl_texture_target, GL_RGB, const int planes = CVPixelBufferGetPlaneCount(p->pbuf);
CVPixelBufferGetWidth(p->pbuf), CVPixelBufferGetHeight(p->pbuf), assert(planar && planes == f->planes || f->planes == 1);
GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE, surface, 0);
if (err != kCGLNoError) for (int i = 0; i < f->planes; i++) {
MP_ERR(hw, "error creating IOSurface texture: %s (%x)\n", gl->BindTexture(hw->gl_texture_target, p->gl_planes[i]);
CGLErrorString(err), gl->GetError());
gl->BindTexture(hw->gl_texture_target, 0); CGLError err = CGLTexImageIOSurface2D(
CGLGetCurrentContext(), hw->gl_texture_target,
f->gl[i].gl_internal_format,
IOSurfaceGetWidthOfPlane(surface, i),
IOSurfaceGetHeightOfPlane(surface, i),
f->gl[i].gl_format, f->gl[i].gl_type, surface, i);
if (err != kCGLNoError)
MP_ERR(hw, "error creating IOSurface texture for plane %d: %s (%x)\n",
i, CGLErrorString(err), gl->GetError());
gl->BindTexture(hw->gl_texture_target, 0);
out_textures[i] = p->gl_planes[i];
}
out_textures[0] = p->gl_texture;
return 0; return 0;
} }
@ -141,8 +221,7 @@ static void destroy(struct gl_hwdec *hw)
GL *gl = hw->gl; GL *gl = hw->gl;
CVPixelBufferRelease(p->pbuf); CVPixelBufferRelease(p->pbuf);
gl->DeleteTextures(1, &p->gl_texture); gl->DeleteTextures(MP_MAX_PLANES, p->gl_planes);
p->gl_texture = 0;
} }
const struct gl_hwdec_driver gl_hwdec_vda = { const struct gl_hwdec_driver gl_hwdec_vda = {

View File

@ -702,6 +702,13 @@ hwaccel_features = [
'av_vda_alloc_context()', 'av_vda_alloc_context()',
framework='IOSurface', framework='IOSurface',
use='libav')), use='libav')),
} , {
'name': 'vda-default-init2',
'desc': 'libavcodec VDA hwaccel (configurable AVVDAContext)',
'deps': [ 'vda-hwaccel' ],
'func': check_statement('libavcodec/vda.h',
'av_vda_default_init2(NULL, NULL)',
use='libav'),
}, { }, {
'name': '--vda-gl', 'name': '--vda-gl',
'desc': 'VDA with OpenGL', 'desc': 'VDA with OpenGL',