From 39d1ab82e5a3ac30e5495c6b6773823c2ff56594 Mon Sep 17 00:00:00 2001
From: xylosper <darklin20@gmail.com>
Date: Fri, 20 Sep 2013 22:55:13 +0900
Subject: [PATCH] vaapi: add vf_vavpp and use it for deinterlacing

Merged from pull request #246 by xylosper. Minor cosmetic changes, some
adjustments (compatibility with older libva versions), and manpage
additions by wm4.

Signed-off-by: wm4 <wm4@nowhere>
---
 DOCS/man/en/vf.rst      |  17 ++
 DOCS/man/en/vo.rst      |  11 +-
 Makefile                |   4 +-
 configure               |  13 +
 mpvcore/command.c       |   3 +
 video/decode/vaapi.c    |  65 +++--
 video/filter/vf.c       |   4 +
 video/filter/vf.h       |   1 +
 video/filter/vf_vavpp.c | 406 ++++++++++++++++++++++++++++++
 video/filter/vf_vo.c    |   2 +
 video/out/vo_vaapi.c    | 542 ++++++----------------------------------
 video/vaapi.c           | 520 ++++++++++++++++++++++++++++++++++++++
 video/vaapi.h           |  77 ++++--
 13 files changed, 1143 insertions(+), 522 deletions(-)
 create mode 100644 video/filter/vf_vavpp.c
 create mode 100644 video/vaapi.c

diff --git a/DOCS/man/en/vf.rst b/DOCS/man/en/vf.rst
index b7f0416ebb..6de671ea58 100644
--- a/DOCS/man/en/vf.rst
+++ b/DOCS/man/en/vf.rst
@@ -861,3 +861,20 @@ Available filters are:
 
     ``a3=<string>``
         Specify the fourth parameter to pass to the library.
+
+``vavpp``
+    VA-API video post processing. Works with ``--vo=vaapi`` only. Currently
+    deinterlaces. This filter is automatically inserted if deinterlacing is
+    requested (either using the ``D`` key, by default mapped to the command
+    ``cycle deinterlace``, or the ``--deinterlace`` option).
+
+    ``deint=<method>``
+        Select the deinterlacing algorithm.
+
+        no
+            Don't perform deinterlacing.
+        first-field
+            Show only first field (going by ``--field-dominance``).
+        bob
+            bob deinterlacing (default).
+
diff --git a/DOCS/man/en/vo.rst b/DOCS/man/en/vo.rst
index c57f25f125..88c1821e9c 100644
--- a/DOCS/man/en/vo.rst
+++ b/DOCS/man/en/vo.rst
@@ -701,12 +701,19 @@ Available video output drivers are:
         initially always off, and needs to be enabled with the ``D`` key
         (default key binding for ``cycle deinterlace``).
 
+        This option doesn't apply if libva supports video post processing (vpp).
+        In this case, the default for ``deint-mode`` is ``no``, and enabling
+        deinterlacing via user interaction using the methods mentioned above
+        actually inserts the ``vavpp`` video filter. If vpp is not actually
+        supported with the libva backend in use, you can use this option to
+        forcibly enable VO based deinterlacing.
+
         no
-            Don't allow deinterlacing.
+            Don't allow deinterlacing (default for newer libva).
         first-field
             Show only first field (going by ``--field-dominance``).
         bob
-            bob deinterlacing (default).
+            bob deinterlacing (default for older libva).
 
     ``scaled-osd=<yes|no>``
         If enabled, then the OSD is rendered at video resolution and scaled to
diff --git a/Makefile b/Makefile
index e00debdfbc..5199c7d28b 100644
--- a/Makefile
+++ b/Makefile
@@ -112,7 +112,9 @@ SOURCES-$(VDA)                  += video/decode/vda.c
 SOURCES-$(VDPAU_DEC)            += video/decode/vdpau.c
 SOURCES-$(VDPAU_DEC_OLD)        += video/decode/vdpau_old.c
 SOURCES-$(VAAPI)                += video/out/vo_vaapi.c \
-                                   video/decode/vaapi.c
+                                   video/decode/vaapi.c \
+                                   video/vaapi.c
+SOURCES-$(VAAPI_VPP)            += video/filter/vf_vavpp.c
 
 SOURCES-$(X11)                  += video/out/vo_x11.c video/out/x11_common.c
 SOURCES-$(XV)                   += video/out/vo_xv.c
diff --git a/configure b/configure
index 8304d46fda..fbffd008e9 100755
--- a/configure
+++ b/configure
@@ -1869,6 +1869,8 @@ echores "$_vdpau"
 
 
 echocheck "VAAPI"
+_vaapi_vpp=no
+def_vaapi_vpp='#define CONFIG_VAAPI_VPP 0'
 if test "$_vaapi" = auto && test "$_x11" = yes ; then
   _vaapi=no
   if test "$_dl" = yes ; then
@@ -1884,6 +1886,15 @@ else
 fi
 echores "$_vaapi"
 
+if test "$_vaapi" = yes ; then
+  echocheck "VAAPI VPP"
+  if pkg-config 'libva >= 0.34.0' ; then
+    _vaapi_vpp=yes
+    def_vaapi_vpp='#define CONFIG_VAAPI_VPP 1'
+  fi
+  echores "$_vaapi_vpp"
+fi
+
 
 echocheck "Xinerama"
 if test "$_xinerama" = auto && test "$_x11" = yes ; then
@@ -3173,6 +3184,7 @@ VDPAU_DEC_OLD = $_vdpau_dec_old
 VDA = $_vda
 VDA_REFCOUNTING = $_vda_refcounting
 VAAPI = $_vaapi
+VAAPI_VPP = $_vaapi_vpp
 WIN32 = $_win32
 X11 = $_x11
 WAYLAND = $_wayland
@@ -3351,6 +3363,7 @@ $def_vdpau
 $def_vda
 $def_vda_refcounting
 $def_vaapi
+$def_vaapi_vpp
 $def_vm
 $def_x11
 $def_wayland
diff --git a/mpvcore/command.c b/mpvcore/command.c
index b451669f12..b30a5299e0 100644
--- a/mpvcore/command.c
+++ b/mpvcore/command.c
@@ -1140,6 +1140,9 @@ static const char *deint_filters[] = {
     "lavfi=yadif",
 #endif
     "yadif",
+#if CONFIG_VAAPI_VPP
+    "vavpp",
+#endif
     NULL
 };
 
diff --git a/video/decode/vaapi.c b/video/decode/vaapi.c
index 9064389afe..8bec94d950 100644
--- a/video/decode/vaapi.c
+++ b/video/decode/vaapi.c
@@ -31,6 +31,7 @@
 #include "video/fmt-conversion.h"
 #include "video/vaapi.h"
 #include "video/decode/dec_video.h"
+#include "video/filter/vf.h"
 
 /*
  * The VAAPI decoder can work only with surfaces passed to the decoder at
@@ -60,6 +61,9 @@ struct priv {
 
     int format, w, h;
     VASurfaceID surfaces[MAX_SURFACES];
+
+    struct va_surface_pool *pool;
+    int rt_format;
 };
 
 struct profile_entry {
@@ -159,36 +163,23 @@ static int is_direct_mapping(VADisplay display)
     return 0;
 }
 
-// Make vo_vaapi.c pool the required number of surfaces.
-// This is very touchy: vo_vaapi.c must not free surfaces while we decode,
-// and we must allocate only surfaces that were passed to the decoder on
-// creation.
-// We achieve this by deleting all previous surfaces, then allocate every
-// surface needed. Then we free these surfaces, and rely on the fact that
-// vo_vaapi.c keeps the released surfaces in the pool, and only allocates
-// new surfaces out of that pool.
-static int preallocate_surfaces(struct lavc_ctx *ctx, int va_rt_format, int num)
+// We must allocate only surfaces that were passed to the decoder on creation.
+// We achieve this by reserving surfaces in the pool as needed.
+// Releasing surfaces is necessary after filling the surface id list so
+// that reserved surfaces can be reused for decoding.
+static bool preallocate_surfaces(struct lavc_ctx *ctx, int num)
 {
     struct priv *p = ctx->hwdec_priv;
-    int res = -1;
-
-    struct mp_image *tmp_surfaces[MAX_SURFACES] = {0};
-
-    p->ctx->flush(p->ctx); // free previously allocated surfaces
-
-    for (int n = 0; n < num; n++) {
-        tmp_surfaces[n] = p->ctx->get_surface(p->ctx, va_rt_format, p->format,
-                                              p->w, p->h);
-        if (!tmp_surfaces[n])
-            goto done;
-        p->surfaces[n] = (uintptr_t)tmp_surfaces[n]->planes[3];
+    if (!va_surface_pool_reserve(p->pool, num, p->w, p->h)) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Could not allocate surfaces.\n");
+        return false;
     }
-    res = 0;
-
-done:
-    for (int n = 0; n < num; n++)
-        talloc_free(tmp_surfaces[n]);
-    return res;
+    for (int i = 0; i < num; i++) {
+        struct va_surface *s = va_surface_pool_get(p->pool, p->w, p->h);
+        p->surfaces[i] = s->id;
+        va_surface_release(s);
+    }
+    return true;
 }
 
 static void destroy_decoder(struct lavc_ctx *ctx)
@@ -274,7 +265,7 @@ static int create_decoder(struct lavc_ctx *ctx)
         goto error;
     }
 
-    if (preallocate_surfaces(ctx, VA_RT_FORMAT_YUV420, num_surfaces) < 0) {
+    if (!preallocate_surfaces(ctx, num_surfaces)) {
         mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Could not allocate surfaces.\n");
         goto error;
     }
@@ -298,7 +289,7 @@ static int create_decoder(struct lavc_ctx *ctx)
                                    &attrib, 1);
     if (!check_va_status(status, "vaGetConfigAttributes()"))
         goto error;
-    if ((attrib.value & VA_RT_FORMAT_YUV420) == 0) {
+    if ((attrib.value & p->rt_format) == 0) {
         mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Chroma format not supported.\n");
         goto error;
     }
@@ -339,14 +330,13 @@ static struct mp_image *allocate_image(struct lavc_ctx *ctx, int format,
             return NULL;
     }
 
-    struct mp_image *img = p->ctx->get_surface(p->ctx, VA_RT_FORMAT_YUV420,
-                                               format, p->w, p->h);
-    if (img) {
+    struct va_surface *s = va_surface_pool_get(p->pool, p->w, p->h);
+    if (s) {
         for (int n = 0; n < MAX_SURFACES; n++) {
-            if (p->surfaces[n] == (uintptr_t)img->planes[3])
-                return img;
+            if (p->surfaces[n] == s->id)
+                return va_surface_wrap(s);
         }
-        talloc_free(img);
+        va_surface_release(s);
     }
     mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] Insufficient number of surfaces.\n");
     return NULL;
@@ -361,6 +351,7 @@ static void uninit(struct lavc_ctx *ctx)
 
     destroy_decoder(ctx);
 
+    va_surface_pool_release(p->pool);
     talloc_free(p);
     ctx->hwdec_priv = NULL;
 }
@@ -371,16 +362,18 @@ static int init(struct lavc_ctx *ctx)
     *p = (struct priv) {
         .ctx = ctx->hwdec_info->vaapi_ctx,
         .va_context = &p->va_context_storage,
+        .rt_format = VA_RT_FORMAT_YUV420
     };
-    ctx->hwdec_priv = p;
 
     p->display = p->ctx->display;
+    p->pool = va_surface_pool_alloc(p->display, p->rt_format);
 
     p->va_context->display = p->display;
     p->va_context->config_id = VA_INVALID_ID;
     p->va_context->context_id = VA_INVALID_ID;
 
     ctx->avctx->hwaccel_context = p->va_context;
+    ctx->hwdec_priv = p;
 
     return 0;
 }
diff --git a/video/filter/vf.c b/video/filter/vf.c
index 56ddd32fd8..216f3c42f4 100644
--- a/video/filter/vf.c
+++ b/video/filter/vf.c
@@ -70,6 +70,7 @@ extern const vf_info_t vf_info_yadif;
 extern const vf_info_t vf_info_stereo3d;
 extern const vf_info_t vf_info_dlopen;
 extern const vf_info_t vf_info_lavfi;
+extern const vf_info_t vf_info_vaapi;
 
 // list of available filters:
 static const vf_info_t *const filter_list[] = {
@@ -111,6 +112,9 @@ static const vf_info_t *const filter_list[] = {
     &vf_info_stereo3d,
 #ifdef CONFIG_DLOPEN
     &vf_info_dlopen,
+#endif
+#if CONFIG_VAAPI_VPP
+    &vf_info_vaapi,
 #endif
     NULL
 };
diff --git a/video/filter/vf.h b/video/filter/vf.h
index 730b0e0da0..ea1246da17 100644
--- a/video/filter/vf.h
+++ b/video/filter/vf.h
@@ -106,6 +106,7 @@ typedef struct vf_seteq {
 /* Hack to make the OSD state object available to vf_sub which
  * access OSD/subtitle state outside of normal OSD draw time. */
 #define VFCTRL_SET_OSD_OBJ 20
+#define VFCTRL_GET_HWDEC_INFO 21 // for hwdec filters
 
 int vf_control(struct vf_instance *vf, int cmd, void *arg);
 
diff --git a/video/filter/vf_vavpp.c b/video/filter/vf_vavpp.c
new file mode 100644
index 0000000000..ba4e9ca411
--- /dev/null
+++ b/video/filter/vf_vavpp.c
@@ -0,0 +1,406 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <va/va.h>
+#include <va/va_vpp.h>
+
+#include "config.h"
+#include "mpvcore/options.h"
+#include "vf.h"
+#include "video/vaapi.h"
+#include "video/decode/dec_video.h"
+
+static inline bool is_success(VAStatus status, const char *msg)
+{
+    if (status == VA_STATUS_SUCCESS)
+        return true;
+    mp_msg(MSGT_VFILTER, MSGL_ERR, "[vavpp] %s: %s\n", msg, vaErrorStr(status));
+    return false;
+}
+
+struct surface_refs {
+    VASurfaceID *surfaces;
+    int num_allocated;
+    int num_required;
+};
+
+struct pipeline {
+    VABufferID *filters;
+    int num_filters;
+    VAProcColorStandardType input_colors[VAProcColorStandardCount];
+    VAProcColorStandardType output_colors[VAProcColorStandardCount];
+    int num_input_colors, num_output_colors;
+    struct surface_refs forward, backward;
+};
+
+struct vf_priv_s {
+    double prev_pts;
+    int deint_type; // 0: none, 1: discard, 2: double fps
+    bool do_deint;
+    VABufferID buffers[VAProcFilterCount];
+    int num_buffers;
+    VAConfigID config;
+    VAContextID context;
+    struct mp_image_params params;
+    VADisplay display;
+    struct mp_vaapi_ctx *va;
+    struct pipeline pipe;
+    struct va_surface_pool *pool;
+};
+
+static const struct vf_priv_s vf_priv_default = {
+    .prev_pts = MP_NOPTS_VALUE,
+    .config = VA_INVALID_ID,
+    .context = VA_INVALID_ID,
+    .deint_type = 2,
+};
+
+static inline void realloc_refs(struct surface_refs *refs, int num)
+{
+    if (refs->num_allocated < num) {
+        refs->surfaces = realloc(refs->surfaces, sizeof(VASurfaceID)*num);
+        refs->num_allocated = num;
+    }
+    refs->num_required = num;
+}
+
+static bool update_pipeline(struct vf_priv_s *p, bool deint)
+{
+    VABufferID *filters = p->buffers;
+    int num_filters = p->num_buffers;
+    if (p->deint_type && !deint) {
+        ++filters;
+        --num_filters;
+    }
+    if (filters == p->pipe.filters && num_filters == p->pipe.num_filters)
+        return true;
+    p->pipe.forward.num_required = p->pipe.backward.num_required = 0;
+    p->pipe.num_input_colors = p->pipe.num_output_colors = 0;
+    p->pipe.num_filters = 0;
+    p->pipe.filters = NULL;
+    if (!num_filters)
+        return false;
+    VAProcPipelineCaps caps;
+    caps.input_color_standards = p->pipe.input_colors;
+    caps.output_color_standards = p->pipe.output_colors;
+    caps.num_input_color_standards = VAProcColorStandardCount;
+    caps.num_output_color_standards = VAProcColorStandardCount;
+    VAStatus status = vaQueryVideoProcPipelineCaps(p->display, p->context,
+                                                   filters, num_filters, &caps);
+    if (!is_success(status, "vaQueryVideoProcPipelineCaps()"))
+        return false;
+    p->pipe.filters = filters;
+    p->pipe.num_filters = num_filters;
+    p->pipe.num_input_colors = caps.num_input_color_standards;
+    p->pipe.num_output_colors = caps.num_output_color_standards;
+    realloc_refs(&p->pipe.forward, caps.num_forward_references);
+    realloc_refs(&p->pipe.backward, caps.num_backward_references);
+    return true;
+}
+
+static inline int get_deint_field(struct vf_priv_s *p, int i,
+                                  const struct mp_image *mpi)
+{
+    if (!p->do_deint || !(mpi->fields & MP_IMGFIELD_INTERLACED))
+        return VA_FRAME_PICTURE;
+    return !!(mpi->fields & MP_IMGFIELD_TOP_FIRST) ^ i ? VA_TOP_FIELD : VA_BOTTOM_FIELD;
+}
+
+static struct mp_image *render(struct vf_priv_s *p, struct va_surface *in,
+                               unsigned int flags)
+{
+    if (!p->pipe.filters || !in)
+        return NULL;
+    struct va_surface *out = va_surface_pool_get(p->pool, in->w, in->h);
+    if (!out)
+        return NULL;
+    enum {Begun = 1, Rendered = 2};
+    int state = 0;
+    do { // not a loop, just for break
+        VAStatus status = vaBeginPicture(p->display, p->context, out->id);
+        if (!is_success(status, "vaBeginPicture()"))
+            break;
+        state |= Begun;
+        VABufferID buffer = VA_INVALID_ID;
+        VAProcPipelineParameterBuffer *param = NULL;
+        status = vaCreateBuffer(p->display, p->context,
+                                VAProcPipelineParameterBufferType,
+                                sizeof(*param), 1, NULL, &buffer);
+        if (!is_success(status, "vaCreateBuffer()"))
+            break;
+        status = vaMapBuffer(p->display, buffer, (void**)&param);
+        if (!is_success(status, "vaMapBuffer()"))
+            break;
+        param->surface = in->id;
+        param->surface_region = NULL;
+        param->output_region = NULL;
+        param->output_background_color = 0;
+        param->filter_flags = flags;
+        param->filters = p->pipe.filters;
+        param->num_filters = p->pipe.num_filters;
+        vaUnmapBuffer(p->display, buffer);
+        param->forward_references = p->pipe.forward.surfaces;
+        param->backward_references = p->pipe.backward.surfaces;
+        param->num_forward_references = p->pipe.forward.num_required;
+        param->num_backward_references = p->pipe.backward.num_required;
+        status = vaRenderPicture(p->display, p->context, &buffer, 1);
+        if (!is_success(status, "vaRenderPicture()"))
+            break;
+        state |= Rendered;
+    } while (false);
+    if (state & Begun)
+        vaEndPicture(p->display, p->context);
+    if (state & Rendered)
+        return va_surface_wrap(out);
+    va_surface_release(out);
+    return NULL;
+}
+
+// return value: the number of created images
+static int process(struct vf_priv_s *p, struct mp_image *in,
+                   struct mp_image **out1, struct mp_image **out2)
+{
+    const bool deint = p->do_deint && p->deint_type > 0;
+    if (!update_pipeline(p, deint) || !p->pipe.filters) // no filtering
+        return 0;
+    struct va_surface *surface = va_surface_in_mp_image(in);
+    const unsigned int csp = va_get_colorspace_flag(p->params.colorspace);
+    const unsigned int field = get_deint_field(p, 0, in);
+    *out1 = render(p, surface, field | csp);
+    if (!*out1) // cannot render
+        return 0;
+    mp_image_copy_attributes(*out1, in);
+    if (field == VA_FRAME_PICTURE || p->deint_type < 2) // first-field only
+        return 1;
+    const double add = (in->pts - p->prev_pts)*0.5;
+    if (p->prev_pts == MP_NOPTS_VALUE || add <= 0.0 || add > 0.5) // no pts, skip it
+        return 1;
+    *out2 = render(p, surface, get_deint_field(p, 1, in) | csp);
+    if (!*out2) // cannot render
+        return 1;
+    mp_image_copy_attributes(*out2, in);
+    (*out2)->pts = in->pts + add;
+    return 2;
+}
+
+static struct mp_image *upload(struct vf_priv_s *p, struct mp_image *in)
+{
+    struct va_surface *surface =
+        va_surface_pool_get_by_imgfmt(p->pool, p->va->image_formats, in->imgfmt, in->w, in->h);
+    if (!surface)
+        surface = va_surface_pool_get(p->pool, in->w, in->h); // dummy
+    else
+        va_surface_upload(surface, in);
+    struct mp_image *out = va_surface_wrap(surface);
+    mp_image_copy_attributes(out, in);
+    return out;
+}
+
+static int filter_ext(struct vf_instance *vf, struct mp_image *in)
+{
+    struct vf_priv_s *p = vf->priv;
+    struct va_surface *surface = va_surface_in_mp_image(in);
+    const int rt_format = surface ? surface->rt_format : VA_RT_FORMAT_YUV420;
+    if (!p->pool || va_surface_pool_rt_format(p->pool) != rt_format) {
+        va_surface_pool_release(p->pool);
+        p->pool = va_surface_pool_alloc(p->display, rt_format);
+    }
+    if (!surface) {
+        struct mp_image *tmp = upload(p, in);
+        talloc_free(in);
+        in = tmp;
+    }
+
+    struct mp_image *out1, *out2;
+    const double pts = in->pts;
+    const int num = process(p, in, &out1, &out2);
+    if (!num)
+        vf_add_output_frame(vf, in);
+    else {
+        vf_add_output_frame(vf, out1);
+        if (num > 1)
+            vf_add_output_frame(vf, out2);
+        talloc_free(in);
+    }
+    p->prev_pts = pts;
+    return 0;
+}
+
+static int reconfig(struct vf_instance *vf, struct mp_image_params *params,
+                    int flags)
+{
+    struct vf_priv_s *p = vf->priv;
+
+    p->prev_pts = MP_NOPTS_VALUE;
+    p->params = *params;
+    params->imgfmt = IMGFMT_VAAPI;
+    return vf_next_reconfig(vf, params, flags);
+}
+
+static void uninit(struct vf_instance *vf)
+{
+    struct vf_priv_s *p = vf->priv;
+    for (int i=0; i<p->num_buffers; ++i)
+        vaDestroyBuffer(p->display, p->buffers[i]);
+    if (p->context != VA_INVALID_ID)
+        vaDestroyContext(p->display, p->context);
+    if (p->config != VA_INVALID_ID)
+        vaDestroyConfig(p->display, p->config);
+    free(p->pipe.forward.surfaces);
+    free(p->pipe.backward.surfaces);
+    va_surface_pool_release(p->pool);
+}
+
+static int query_format(struct vf_instance *vf, unsigned int imgfmt)
+{
+    struct vf_priv_s *p = vf->priv;
+    if (IMGFMT_IS_VAAPI(imgfmt) || va_image_format_from_imgfmt(p->va->image_formats, imgfmt))
+        return vf_next_query_format(vf, IMGFMT_VAAPI);
+    return 0;
+}
+
+static int control(struct vf_instance *vf, int request, void* data)
+{
+    struct vf_priv_s *p = vf->priv;
+    switch (request){
+    case VFCTRL_GET_DEINTERLACE:
+        *(int*)data = !!p->do_deint;
+        return true;
+    case VFCTRL_SET_DEINTERLACE:
+        p->do_deint = *(int*)data;
+        return true;
+    default:
+        return vf_next_control (vf, request, data);
+    }
+}
+
+static int va_query_filter_caps(struct vf_priv_s *p, VAProcFilterType type,
+                                void *caps, unsigned int count)
+{
+    VAStatus status = vaQueryVideoProcFilterCaps(p->display, p->context, type,
+                                                 caps, &count);
+    return is_success(status, "vaQueryVideoProcFilterCaps()") ? count : 0;
+}
+
+static VABufferID va_create_filter_buffer(struct vf_priv_s *p, int bytes,
+                                          int num, void *data)
+{
+    VABufferID buffer;
+    VAStatus status = vaCreateBuffer(p->display, p->context,
+                                     VAProcFilterParameterBufferType,
+                                     bytes, num, data, &buffer);
+    return is_success(status, "vaCreateBuffer()") ? buffer : VA_INVALID_ID;
+}
+
+static bool initialize(struct vf_priv_s *p)
+{
+    VAStatus status;
+
+    VAConfigID config;
+    status = vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc,
+                            NULL, 0, &config);
+    if (!is_success(status, "vaCreateConfig()")) // no entrypoint for video porc
+        return false;
+    p->config = config;
+
+    VAContextID context;
+    status = vaCreateContext(p->display, p->config, 0, 0, 0, NULL, 0, &context);
+    if (!is_success(status, "vaCreateContext()"))
+        return false;
+    p->context = context;
+
+    VAProcFilterType filters[VAProcFilterCount];
+    int num_filters = VAProcFilterCount;
+    status = vaQueryVideoProcFilters(p->display, p->context, filters, &num_filters);
+    if (!is_success(status, "vaQueryVideoProcFilters()"))
+        return false;
+
+    VABufferID buffers[VAProcFilterCount];
+    for (int i=0; i<VAProcFilterCount; ++i)
+        buffers[i] = VA_INVALID_ID;
+    for (int i=0; i<num_filters; ++i) {
+        if (filters[i] == VAProcFilterDeinterlacing) {
+            if (!p->deint_type)
+                continue;
+            VAProcFilterCapDeinterlacing caps[VAProcDeinterlacingCount];
+            int num = va_query_filter_caps(p, VAProcFilterDeinterlacing, caps,
+                                           VAProcDeinterlacingCount);
+            if (!num)
+                continue;
+            VAProcDeinterlacingType algorithm = VAProcDeinterlacingBob;
+            for (int i=0; i<num; ++i) { // find Bob
+                if (caps[i].type != algorithm)
+                    continue;
+                VAProcFilterParameterBufferDeinterlacing param;
+                param.type = VAProcFilterDeinterlacing;
+                param.algorithm = algorithm;
+                buffers[VAProcFilterDeinterlacing] =
+                    va_create_filter_buffer(p, sizeof(param), 1, &param);
+            }
+        } // check other filters
+    }
+    p->num_buffers = 0;
+    if (buffers[VAProcFilterDeinterlacing] != VA_INVALID_ID)
+        p->buffers[p->num_buffers++] = buffers[VAProcFilterDeinterlacing];
+    else
+        p->deint_type = 0;
+    p->do_deint = !!p->deint_type;
+    // next filters: p->buffers[p->num_buffers++] = buffers[next_filter];
+    return true;
+}
+
+static int vf_open(vf_instance_t *vf, char *args)
+{
+    vf->reconfig = reconfig;
+    vf->filter_ext = filter_ext;
+    vf->query_format = query_format;
+    vf->uninit = uninit;
+    vf->control = control;
+
+    struct vf_priv_s *p = vf->priv;
+    struct mp_hwdec_info hwdec;
+    if (vf_control(vf->next, VFCTRL_GET_HWDEC_INFO, &hwdec) <= 0)
+        return false;
+    p->va = hwdec.vaapi_ctx;
+    if (!p->va || !p->va->display)
+        return false;
+    p->display = p->va->display;
+    if (initialize(p))
+        return true;
+    uninit(vf);
+    return false;
+}
+
+#define OPT_BASE_STRUCT struct vf_priv_s
+static const m_option_t vf_opts_fields[] = {
+    OPT_CHOICE("deint", deint_type, 0,
+               ({"no", 0},
+                {"first-field", 1},
+                {"bob", 2})),
+    {0}
+};
+
+const vf_info_t vf_info_vaapi = {
+    .info = "VA-API Video Post-Process Filter",
+    .name = "vavpp",
+    .author = "xylosper",
+    .comment = "",
+    .vf_open = vf_open,
+    .priv_size = sizeof(struct vf_priv_s),
+    .priv_defaults = &vf_priv_default,
+    .options = vf_opts_fields,
+};
diff --git a/video/filter/vf_vo.c b/video/filter/vf_vo.c
index 05f835d5fd..cccfb45fc2 100644
--- a/video/filter/vf_vo.c
+++ b/video/filter/vf_vo.c
@@ -83,6 +83,8 @@ static int control(struct vf_instance *vf, int request, void *data)
         };
         return vo_control(video_out, VOCTRL_GET_EQUALIZER, &param) == VO_TRUE;
     }
+    case VFCTRL_GET_HWDEC_INFO:
+        return vo_control(video_out, VOCTRL_GET_HWDEC_INFO, data) == VO_TRUE;
     }
     return CONTROL_UNKNOWN;
 }
diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c
index d7947cda20..dd7f138a24 100644
--- a/video/out/vo_vaapi.c
+++ b/video/out/vo_vaapi.c
@@ -43,22 +43,6 @@
 #include "video/vaapi.h"
 #include "video/decode/dec_video.h"
 
-#define STR_FOURCC(fcc) \
-    (const char[]){(fcc), (fcc) >> 8u, (fcc) >> 16u, (fcc) >> 24u, 0}
-
-struct vaapi_surface {
-    VASurfaceID id;       // VA_INVALID_ID if unallocated
-    int w, h, va_format;  // parameters of allocated image (0/0/-1 unallocated)
-    VAImage     image;    // used for sofwtare decoding case
-    bool        is_bound; // image bound to the surface?
-    bool        is_used;  // referenced by a mp_image
-    bool        is_dead;  // used, but deallocate VA objects as soon as possible
-    int         order;    // for LRU allocation
-
-    // convenience shortcut for mp_image deallocation callback
-    struct priv *p;
-};
-
 struct vaapi_osd_image {
     int            w, h;
     VAImage        image;
@@ -86,7 +70,7 @@ struct priv {
     struct mp_log           *log;
     struct vo               *vo;
     VADisplay                display;
-    struct mp_vaapi_ctx      mpvaapi;
+    struct mp_vaapi_ctx     *mpvaapi;
 
     struct mp_image_params   image_params;
     struct mp_rect           src_rect;
@@ -98,21 +82,18 @@ struct priv {
 
     int                      output_surface;
     int                      visible_surface;
-    int                      deint;
-    int                      deint_type;
     int                      scaling;
     int                      force_scaled_osd;
+    // with old libva versions only
+    int                      deint;
+    int                      deint_type;
 
     VAImageFormat            osd_format; // corresponds to OSD_VA_FORMAT
     struct vaapi_osd_part    osd_parts[MAX_OSD_PARTS];
     bool                     osd_screen;
 
-    int                      num_video_surfaces;
-    struct vaapi_surface   **video_surfaces;
-    int                      video_surface_lru_counter;
-
-    VAImageFormat           *va_image_formats;
-    int                      va_num_image_formats;
+    struct va_surface_pool  *pool;
+    struct va_image_formats *va_image_formats;
     VAImageFormat           *va_subpic_formats;
     unsigned int            *va_subpic_flags;
     int                      va_num_subpic_formats;
@@ -128,187 +109,10 @@ static const bool osd_formats[SUBBITMAP_COUNT] = {
     [SUBBITMAP_RGBA] = true,
 };
 
-struct fmtentry {
-    uint32_t va;
-    int mp;
-};
-static struct fmtentry va_to_imgfmt[] = {
-    {VA_FOURCC('Y','V','1','2'), IMGFMT_420P},
-    {VA_FOURCC('I','4','2','0'), IMGFMT_420P},
-    {VA_FOURCC('I','Y','U','V'), IMGFMT_420P},
-    {VA_FOURCC('N','V','1','2'), IMGFMT_NV12},
-    // Note: not sure about endian issues (the mp formats are byte-addressed)
-    {VA_FOURCC_RGBA,             IMGFMT_RGBA},
-    {VA_FOURCC_BGRA,             IMGFMT_BGRA},
-    // Untested.
-    //{VA_FOURCC_UYVY,             IMGFMT_UYVY},
-    //{VA_FOURCC_YUY2,             IMGFMT_YUYV},
-    {0}
-};
-
-
-static int va_fourcc_to_imgfmt(uint32_t fourcc)
-{
-    for (int n = 0; va_to_imgfmt[n].mp; n++) {
-        if (va_to_imgfmt[n].va == fourcc)
-            return va_to_imgfmt[n].mp;
-    }
-    return 0;
-}
-
-static VAImageFormat *VAImageFormat_from_imgfmt(struct priv *p, int format)
-{
-    for (int i = 0; i < p->va_num_image_formats; i++) {
-        if (va_fourcc_to_imgfmt(p->va_image_formats[i].fourcc) == format)
-            return &p->va_image_formats[i];
-    }
-    return NULL;
-}
-
-static struct vaapi_surface *to_vaapi_surface(struct priv *p,
-                                              struct mp_image *img)
-{
-    if (!img || !IMGFMT_IS_VAAPI(img->imgfmt))
-        return NULL;
-    // Note: we _could_ use planes[1] or planes[2] to store a vaapi_surface
-    //       pointer, but I just don't trust libavcodec enough.
-    VASurfaceID id = (uintptr_t)img->planes[3];
-    for (int n = 0; n < p->num_video_surfaces; n++) {
-        struct vaapi_surface *s = p->video_surfaces[n];
-        if (s->id == id)
-            return s;
-    }
-    return NULL;
-}
-
-static struct vaapi_surface *alloc_vaapi_surface(struct priv *p, int w, int h,
-                                                 int va_format)
-{
-    VAStatus status;
-
-    VASurfaceID id = VA_INVALID_ID;
-    status = vaCreateSurfaces(p->display, w, h, va_format, 1, &id);
-    if (!check_va_status(status, "vaCreateSurfaces()"))
-        return NULL;
-
-    struct vaapi_surface *surface = NULL;
-    for (int n = 0; n < p->num_video_surfaces; n++) {
-        struct vaapi_surface *s = p->video_surfaces[n];
-        if (s->id == VA_INVALID_ID) {
-            surface = s;
-            break;
-        }
-    }
-    if (!surface) {
-        surface = talloc_ptrtype(NULL, surface);
-        MP_TARRAY_APPEND(p, p->video_surfaces, p->num_video_surfaces, surface);
-    }
-
-    *surface = (struct vaapi_surface) {
-        .id = id,
-        .image = { .image_id = VA_INVALID_ID, .buf = VA_INVALID_ID },
-        .w = w,
-        .h = h,
-        .va_format = va_format,
-        .p = p,
-    };
-    return surface;
-}
-
-static void destroy_vaapi_surface(struct priv *p, struct vaapi_surface *s)
-{
-    if (!s || s->id == VA_INVALID_ID)
-        return;
-    assert(!s->is_used);
-
-    if (s->image.image_id != VA_INVALID_ID)
-        vaDestroyImage(p->display, s->image.image_id);
-    vaDestroySurfaces(p->display, &s->id, 1);
-    s->id = VA_INVALID_ID;
-    s->w = 0;
-    s->h = 0;
-    s->va_format = -1;
-}
-
-static struct vaapi_surface *get_vaapi_surface(struct priv *p, int w, int h,
-                                               int va_format)
-{
-    struct vaapi_surface *best = NULL;
-
-    for (int n = 0; n < p->num_video_surfaces; n++) {
-        struct vaapi_surface *s = p->video_surfaces[n];
-        if (!s->is_used && s->w == w && s->h == h && s->va_format == va_format) {
-            if (!best || best->order > s->order)
-                best = s;
-        }
-    }
-
-    if (!best)
-        best = alloc_vaapi_surface(p, w, h, va_format);
-
-    if (best) {
-        best->is_used = true;
-        best->order = ++p->video_surface_lru_counter;
-    }
-    return best;
-}
-
-static void release_video_surface(void *ptr)
-{
-    struct vaapi_surface *surface = ptr;
-    surface->is_used = false;
-    if (surface->is_dead)
-        destroy_vaapi_surface(surface->p, surface);
-}
-
-static struct mp_image *get_surface(struct mp_vaapi_ctx *ctx, int va_rt_format,
-                                    int mp_format, int w, int h)
-{
-    assert(IMGFMT_IS_VAAPI(mp_format));
-
-    struct vo *vo = ctx->priv;
-    struct priv *p = vo->priv;
-
-    struct mp_image img = {0};
-    mp_image_setfmt(&img, mp_format);
-    mp_image_set_size(&img, w, h);
-
-    struct vaapi_surface *surface = get_vaapi_surface(p, w, h, va_rt_format);
-    if (!surface)
-        return NULL;
-
-    // libavcodec probably wants it at [0] and [3]
-    // [1] and [2] are possibly free for own use.
-    for (int n = 0; n < 4; n++)
-        img.planes[n] = (void *)(uintptr_t)surface->id;
-
-    return mp_image_new_custom_ref(&img, surface, release_video_surface);
-}
-
-// This should be called only by code that is going to preallocate surfaces
-// (and by uninit). Otherwise, hw decoder init might get confused by
-// accidentally releasing hw decoder preallocated surfaces.
-static void flush_surfaces(struct mp_vaapi_ctx *ctx)
-{
-    struct vo *vo = ctx->priv;
-    struct priv *p = vo->priv;
-
-    for (int n = 0; n < p->num_video_surfaces; n++) {
-        struct vaapi_surface *s = p->video_surfaces[n];
-        if (s->is_used) {
-            s->is_dead = true;
-        } else {
-            destroy_vaapi_surface(p, s);
-        }
-    }
-}
-
 static void flush_output_surfaces(struct priv *p)
 {
-    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++) {
-        talloc_free(p->output_surfaces[n]);
-        p->output_surfaces[n] = NULL;
-    }
+    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++)
+        mp_image_unrefp(&p->output_surfaces[n]);
     p->output_surface = 0;
     p->visible_surface = 0;
 }
@@ -318,60 +122,20 @@ static void free_video_specific(struct priv *p)
 {
     flush_output_surfaces(p);
 
-    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++) {
-        talloc_free(p->swdec_surfaces[n]);
-        p->swdec_surfaces[n] = NULL;
-    }
-
-    flush_surfaces(&p->mpvaapi);
+    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++)
+        mp_image_unrefp(&p->swdec_surfaces[n]);
 }
 
-static int alloc_swdec_surfaces(struct priv *p, int w, int h, int format)
+static bool alloc_swdec_surfaces(struct priv *p, int w, int h, int imgfmt)
 {
-    VAStatus status;
-
     free_video_specific(p);
-
-    VAImageFormat *image_format = VAImageFormat_from_imgfmt(p, format);
-    if (!image_format)
-        return -1;
     for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) {
-        // WTF: no mapping from VAImageFormat -> VA_RT_FORMAT_
-        struct mp_image *img =
-            get_surface(&p->mpvaapi, VA_RT_FORMAT_YUV420, IMGFMT_VAAPI, w, h);
-        struct vaapi_surface *s = to_vaapi_surface(p, img);
-        if (!s)
-            return -1;
-
-        if (s->image.image_id != VA_INVALID_ID) {
-            vaDestroyImage(p->display, s->image.image_id);
-            s->image.image_id = VA_INVALID_ID;
-        }
-
-        status = vaDeriveImage(p->display, s->id, &s->image);
-        if (status == VA_STATUS_SUCCESS) {
-            /* vaDeriveImage() is supported, check format */
-            if (s->image.format.fourcc == image_format->fourcc &&
-                s->image.width == w && s->image.height == h)
-            {
-                s->is_bound = true;
-                MP_VERBOSE(p, "Using vaDeriveImage()\n");
-            } else {
-                vaDestroyImage(p->display, s->image.image_id);
-                s->image.image_id = VA_INVALID_ID;
-                status = VA_STATUS_ERROR_OPERATION_FAILED;
-            }
-        }
-        if (status != VA_STATUS_SUCCESS) {
-            status = vaCreateImage(p->display, image_format, w, h, &s->image);
-            if (!check_va_status(status, "vaCreateImage()")) {
-                talloc_free(img);
-                return -1;
-            }
-        }
-        p->swdec_surfaces[i] = img;
+        p->swdec_surfaces[i] =
+            va_surface_pool_get_wrapped(p->pool, p->va_image_formats, imgfmt, w, h);
+        if (!p->swdec_surfaces[i])
+            return false;
     }
-    return 0;
+    return true;
 }
 
 static void resize(struct priv *p)
@@ -392,7 +156,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params, int flags)
                             flags, "vaapi");
 
     if (!IMGFMT_IS_VAAPI(params->imgfmt)) {
-        if (alloc_swdec_surfaces(p, params->w, params->h, params->imgfmt) < 0)
+        if (!alloc_swdec_surfaces(p, params->w, params->h, params->imgfmt))
             return -1;
     }
 
@@ -401,42 +165,22 @@ static int reconfig(struct vo *vo, struct mp_image_params *params, int flags)
     return 0;
 }
 
-static int query_format(struct vo *vo, uint32_t format)
+static int query_format(struct vo *vo, uint32_t imgfmt)
 {
     struct priv *p = vo->priv;
-
-    if (IMGFMT_IS_VAAPI(format) || VAImageFormat_from_imgfmt(p, format))
+    if (IMGFMT_IS_VAAPI(imgfmt) || va_image_format_from_imgfmt(p->va_image_formats, imgfmt))
         return VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW;
 
     return 0;
 }
 
-static inline int get_field_flags(struct priv *p, int i, int flags)
-{
-    return (p->deint && (flags & MP_IMGFIELD_INTERLACED) ?
-            (((!!(flags & MP_IMGFIELD_TOP_FIRST)) ^ i) == 0 ?
-             VA_BOTTOM_FIELD : VA_TOP_FIELD) : VA_FRAME_PICTURE);
-}
-
-static inline int get_colorspace_flags(struct priv *p)
-{
-#if USE_VAAPI_COLORSPACE
-    switch (p->image_params.colorspace) {
-    case MP_CSP_BT_601:         return VA_SRC_BT601;
-    case MP_CSP_BT_709:         return VA_SRC_BT709;
-    case MP_CSP_SMPTE_240M:     return VA_SRC_SMPTE_240;
-    }
-#endif
-    return 0;
-}
-
 static bool render_to_screen(struct priv *p, struct mp_image *mpi)
 {
     bool res = true;
     VAStatus status;
 
-    struct vaapi_surface *surface = to_vaapi_surface(p, mpi);
-    if (!surface)
+    VASurfaceID surface = va_surface_id_in_mp_image(mpi);
+    if (surface == VA_INVALID_ID)
         return false;
 
     for (int n = 0; n < MAX_OSD_PARTS; n++) {
@@ -447,7 +191,7 @@ static bool render_to_screen(struct priv *p, struct mp_image *mpi)
             if (p->osd_screen)
                 flags |= VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD;
             status = vaAssociateSubpicture2(p->display,
-                                            sp->id, &surface->id, 1,
+                                            sp->id, &surface, 1,
                                             sp->src_x, sp->src_y,
                                             sp->src_w, sp->src_h,
                                             sp->dst_x, sp->dst_y,
@@ -457,33 +201,34 @@ static bool render_to_screen(struct priv *p, struct mp_image *mpi)
         }
     }
 
-    for (int i = 0; i <= !!(p->deint > 1); i++) {
-        unsigned int flags = (get_field_flags(p, i, mpi->fields) |
-                              get_colorspace_flags(p) |
-                              p->scaling);
-        status = vaPutSurface(p->display,
-                              surface->id,
-                              p->vo->x11->window,
-                              p->src_rect.x0,
-                              p->src_rect.y0,
-                              p->src_rect.x1 - p->src_rect.x0,
-                              p->src_rect.y1 - p->src_rect.y0,
-                              p->dst_rect.x0,
-                              p->dst_rect.y0,
-                              p->dst_rect.x1 - p->dst_rect.x0,
-                              p->dst_rect.y1 - p->dst_rect.y0,
-                              NULL, 0,
-                              flags);
-        if (!check_va_status(status, "vaPutSurface()"))
-            res = false;
+    int flags = va_get_colorspace_flag(p->image_params.colorspace) | p->scaling;
+    if (p->deint && (mpi->fields & MP_IMGFIELD_INTERLACED)) {
+        flags |= (mpi->fields & MP_IMGFIELD_TOP_FIRST) ?
+                                            VA_BOTTOM_FIELD : VA_TOP_FIELD;
+    } else {
+        flags |= VA_FRAME_PICTURE;
     }
+    status = vaPutSurface(p->display,
+                          surface,
+                          p->vo->x11->window,
+                          p->src_rect.x0,
+                          p->src_rect.y0,
+                          p->src_rect.x1 - p->src_rect.x0,
+                          p->src_rect.y1 - p->src_rect.y0,
+                          p->dst_rect.x0,
+                          p->dst_rect.y0,
+                          p->dst_rect.x1 - p->dst_rect.x0,
+                          p->dst_rect.y1 - p->dst_rect.y0,
+                          NULL, 0,
+                          flags);
+    check_va_status(status, "vaPutSurface()");
 
     for (int n = 0; n < MAX_OSD_PARTS; n++) {
         struct vaapi_osd_part *part = &p->osd_parts[n];
         if (part->active) {
             struct vaapi_subpic *sp = &part->subpic;
             status = vaDeassociateSubpicture(p->display, sp->id,
-                                             &surface->id, 1);
+                                             &surface, 1);
             check_va_status(status, "vaDeassociateSubpicture()");
         }
     }
@@ -500,144 +245,19 @@ static void flip_page(struct vo *vo)
     p->output_surface = (p->output_surface + 1) % MAX_OUTPUT_SURFACES;
 }
 
-static int map_image(struct priv *p, VAImage *va_image, int mpfmt,
-                     struct mp_image *dst)
-{
-    VAStatus status;
-
-    if (mpfmt != va_fourcc_to_imgfmt(va_image->format.fourcc))
-        return -1;
-
-    void *image_data = NULL;
-    status = vaMapBuffer(p->display, va_image->buf, &image_data);
-    if (!check_va_status(status, "vaMapBuffer()"))
-        return -1;
-
-    *dst = (struct mp_image) {0};
-    mp_image_setfmt(dst, mpfmt);
-    mp_image_set_size(dst, va_image->width, va_image->height);
-
-    for (int p = 0; p < va_image->num_planes; p++) {
-        dst->stride[p] = va_image->pitches[p];
-        dst->planes[p] = (uint8_t *)image_data + va_image->offsets[p];
-    }
-
-    if (va_image->format.fourcc == VA_FOURCC('Y','V','1','2')) {
-        FFSWAP(unsigned int, dst->stride[1], dst->stride[2]);
-        FFSWAP(uint8_t *, dst->planes[1], dst->planes[2]);
-    }
-
-    return 0;
-}
-
-static int unmap_image(struct priv *p, VAImage *va_image)
-{
-    VAStatus status;
-
-    status = vaUnmapBuffer(p->display, va_image->buf);
-    return check_va_status(status, "vaUnmapBuffer()") ? 0 : -1;
-}
-
-static int upload_surface(struct priv *p, struct vaapi_surface *va_surface,
-                          struct mp_image *mpi)
-{
-    VAStatus status;
-
-    if (va_surface->image.image_id == VA_INVALID_ID)
-        return -1;
-
-    struct mp_image img;
-    if (map_image(p, &va_surface->image, mpi->imgfmt, &img) < 0)
-        return -1;
-    mp_image_copy(&img, mpi);
-    unmap_image(p, &va_surface->image);
-
-    if (!va_surface->is_bound) {
-        status = vaPutImage2(p->display, va_surface->id,
-                             va_surface->image.image_id,
-                             0, 0, mpi->w, mpi->h,
-                             0, 0, mpi->w, mpi->h);
-        if (!check_va_status(status, "vaPutImage()"))
-            return -1;
-    }
-
-    return 0;
-}
-
-static int try_get_surface(struct priv *p, VAImageFormat *fmt,
-                           struct vaapi_surface *va_surface,
-                           VAImage *out_image)
-{
-    VAStatus status;
-
-    status = vaSyncSurface(p->display, va_surface->id);
-    if (!check_va_status(status, "vaSyncSurface()"))
-        return -2;
-
-    int w = va_surface->w;
-    int h = va_surface->h;
-
-    status = vaCreateImage(p->display, fmt, w, h, out_image);
-    if (!check_va_status(status, "vaCreateImage()"))
-        return -2;
-
-    status = vaGetImage(p->display, va_surface->id, 0, 0, w, h,
-                        out_image->image_id);
-    if (status != VA_STATUS_SUCCESS) {
-        vaDestroyImage(p->display, out_image->image_id);
-        return -1;
-    }
-
-    return 0;
-}
-
-static struct mp_image *download_surface(struct priv *p,
-                                         struct vaapi_surface *va_surface)
-{
-    // We have no clue which format will work, so try them all.
-    // This code is just for screenshots, so it's ok not to cache the right
-    // format (to prevent unnecessary work), and we don't attempt to use
-    // vaDeriveImage() for direct access either.
-    for (int i = 0; i < p->va_num_image_formats; i++) {
-        VAImageFormat *fmt = &p->va_image_formats[i];
-        int mpfmt = va_fourcc_to_imgfmt(fmt->fourcc);
-        if (!mpfmt)
-            continue;
-        VAImage image;
-        int r = try_get_surface(p, fmt, va_surface, &image);
-        if (r == -1)
-            continue;
-        if (r < 0)
-            return NULL;
-
-        struct mp_image *res = NULL;
-        struct mp_image tmp;
-        if (map_image(p, &image, mpfmt, &tmp) >= 0) {
-            res = mp_image_alloc(mpfmt, tmp.w, tmp.h);
-            mp_image_copy(res, &tmp);
-            unmap_image(p, &image);
-        }
-        vaDestroyImage(p->display, image.image_id);
-        return res;
-    }
-
-    MP_ERR(p, "failed to get surface data.\n");
-    return NULL;
-}
-
-static void draw_image(struct vo *vo, mp_image_t *mpi)
+static void draw_image(struct vo *vo, struct mp_image *mpi)
 {
     struct priv *p = vo->priv;
 
     if (!IMGFMT_IS_VAAPI(mpi->imgfmt)) {
-        struct mp_image *surface = p->swdec_surfaces[p->output_surface];
-        struct vaapi_surface *va_surface = to_vaapi_surface(p, surface);
-        if (!va_surface)
+        struct mp_image *wrapper = p->swdec_surfaces[p->output_surface];
+        struct va_surface *surface = va_surface_in_mp_image(wrapper);
+        if (!surface)
             return;
-        if (upload_surface(p, va_surface, mpi) < 0)
+        if (!va_surface_upload(surface, mpi))
             return;
-        mp_image_copy_attributes(surface, mpi);
-        mpi = surface;
+        mp_image_copy_attributes(wrapper, mpi);
+        mpi = wrapper;
     }
 
     mp_image_setrefp(&p->output_surfaces[p->output_surface], mpi);
@@ -645,11 +265,11 @@ static void draw_image(struct vo *vo, mp_image_t *mpi)
 
 static struct mp_image *get_screenshot(struct priv *p)
 {
-    struct vaapi_surface *va_surface =
-        to_vaapi_surface(p, p->output_surfaces[p->visible_surface]);
-    if (!va_surface)
+    struct va_surface *surface =
+        va_surface_in_mp_image(p->output_surfaces[p->visible_surface]);
+    if (!surface)
         return NULL;
-    struct mp_image *img = download_surface(p, va_surface);
+    struct mp_image *img = va_surface_download(surface, p->va_image_formats);
     if (!img)
         return NULL;
     struct mp_image_params params = p->image_params;
@@ -733,7 +353,7 @@ static void draw_osd_cb(void *pctx, struct sub_bitmaps *imgs)
 
         struct vaapi_osd_image *img = &part->image;
         struct mp_image vaimg;
-        if (map_image(p, &img->image, IMGFMT_BGRA, &vaimg) < 0)
+        if (va_image_map(p->display, &img->image, &vaimg) < 0)
             goto error;
 
         // Clear borders and regions uncovered by sub-bitmaps
@@ -754,7 +374,7 @@ static void draw_osd_cb(void *pctx, struct sub_bitmaps *imgs)
                        vaimg.stride[0], sub->stride);
         }
 
-        if (unmap_image(p, &img->image) < 0)
+        if (va_image_unmap(p->display, &img->image) < 0)
             goto error;
 
         part->subpic = (struct vaapi_subpic) {
@@ -864,14 +484,18 @@ static int control(struct vo *vo, uint32_t request, void *data)
 
     switch (request) {
     case VOCTRL_GET_DEINTERLACE:
+        if (!p->deint_type)
+            break;
         *(int*)data = !!p->deint;
         return VO_TRUE;
     case VOCTRL_SET_DEINTERLACE:
+        if (!p->deint_type)
+            break;
         p->deint = *(int*)data ? p->deint_type : 0;
         return VO_TRUE;
     case VOCTRL_GET_HWDEC_INFO: {
         struct mp_hwdec_info *arg = data;
-        arg->vaapi_ctx = &p->mpvaapi;
+        arg->vaapi_ctx = p->mpvaapi;
         return true;
     }
     case VOCTRL_SET_EQUALIZER: {
@@ -910,24 +534,14 @@ static void uninit(struct vo *vo)
     struct priv *p = vo->priv;
 
     free_video_specific(p);
-
-    for (int n = 0; n < p->num_video_surfaces; n++) {
-        struct vaapi_surface *surface = p->video_surfaces[n];
-        // Nothing is allowed to reference HW surfaces past VO lifetime.
-        assert(!surface->is_used);
-        talloc_free(surface);
-    }
-    p->num_video_surfaces = 0;
+    va_surface_pool_release(p->pool);
 
     for (int n = 0; n < MAX_OSD_PARTS; n++) {
         struct vaapi_osd_part *part = &p->osd_parts[n];
         free_subpicture(p, &part->image);
     }
 
-    if (p->display) {
-        vaTerminate(p->display);
-        p->display = NULL;
-    }
+    va_destroy(p->mpvaapi);
 
     vo_x11_uninit(vo);
 }
@@ -947,26 +561,14 @@ static int preinit(struct vo *vo)
     if (!p->display)
         return -1;
 
-    int major_version, minor_version;
-    status = vaInitialize(p->display, &major_version, &minor_version);
-    if (!check_va_status(status, "vaInitialize()"))
+    p->mpvaapi = va_initialize(p->display);
+    if (!p->mpvaapi) {
+        vaTerminate(p->display);
         return -1;
-    MP_VERBOSE(vo, "VA API version %d.%d\n", major_version, minor_version);
+    }
 
-    p->mpvaapi.display = p->display;
-    p->mpvaapi.priv = vo;
-    p->mpvaapi.flush = flush_surfaces;
-    p->mpvaapi.get_surface = get_surface;
-
-    int max_image_formats = vaMaxNumImageFormats(p->display);
-    p->va_image_formats = talloc_array(vo, VAImageFormat, max_image_formats);
-    status = vaQueryImageFormats(p->display, p->va_image_formats,
-                                 &p->va_num_image_formats);
-    if (!check_va_status(status, "vaQueryImageFormats()"))
-        return -1;
-    MP_VERBOSE(vo, "%d image formats available:\n", p->va_num_image_formats);
-    for (int i = 0; i < p->va_num_image_formats; i++)
-        MP_VERBOSE(vo, "  %s\n", STR_FOURCC(p->va_image_formats[i].fourcc));
+    p->pool = va_surface_pool_alloc(p->display, VA_RT_FORMAT_YUV420);
+    p->va_image_formats = p->mpvaapi->image_formats;
 
     int max_subpic_formats = vaMaxNumSubpictureFormats(p->display);
     p->va_subpic_formats = talloc_array(vo, VAImageFormat, max_subpic_formats);
@@ -982,7 +584,7 @@ static int preinit(struct vo *vo)
 
     for (int i = 0; i < p->va_num_subpic_formats; i++) {
         MP_VERBOSE(vo, "  %s, flags 0x%x\n",
-                   STR_FOURCC(p->va_subpic_formats[i].fourcc),
+                   VA_STR_FOURCC(p->va_subpic_formats[i].fourcc),
                    p->va_subpic_flags[i]);
         if (p->va_subpic_formats[i].fourcc == OSD_VA_FORMAT) {
             p->osd_format = p->va_subpic_formats[i];
@@ -1034,8 +636,10 @@ const struct vo_driver video_out_vaapi = {
     .priv_size = sizeof(struct priv),
     .priv_defaults = &(const struct priv) {
         .scaling = VA_FILTER_SCALING_DEFAULT,
-        .deint_type = 2,
         .deint = 0,
+#if !CONFIG_VAAPI_VPP
+        .deint_type = 2,
+#endif
     },
     .options = (const struct m_option[]) {
 #if USE_VAAPI_SCALING
diff --git a/video/vaapi.c b/video/vaapi.c
new file mode 100644
index 0000000000..3612ec2a9a
--- /dev/null
+++ b/video/vaapi.c
@@ -0,0 +1,520 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <libavutil/avutil.h>
+
+#include "vaapi.h"
+#include "mpvcore/mp_msg.h"
+#include "mp_image.h"
+#include "img_format.h"
+
+#define VA_VERBOSE(...) mp_msg(MSGT_VO, MSGL_V, "[vaapi] "  __VA_ARGS__)
+#define VA_ERROR(...) mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] "  __VA_ARGS__)
+
+bool check_va_status(VAStatus status, const char *msg)
+{
+    if (status != VA_STATUS_SUCCESS) {
+        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] %s: %s\n", msg, vaErrorStr(status));
+        return false;
+    }
+    return true;
+}
+
+int va_get_colorspace_flag(enum mp_csp csp)
+{
+#if USE_VAAPI_COLORSPACE
+    switch (csp) {
+    case MP_CSP_BT_601:         return VA_SRC_BT601;
+    case MP_CSP_BT_709:         return VA_SRC_BT709;
+    case MP_CSP_SMPTE_240M:     return VA_SRC_SMPTE_240;
+    }
+#endif
+    return 0;
+}
+
+struct fmtentry {
+    uint32_t va;
+    enum mp_imgfmt mp;
+};
+
+static const struct fmtentry va_to_imgfmt[] = {
+    {VA_FOURCC_YV12, IMGFMT_420P},
+    {VA_FOURCC_I420, IMGFMT_420P},
+    {VA_FOURCC_IYUV, IMGFMT_420P},
+    {VA_FOURCC_NV12, IMGFMT_NV12},
+    {VA_FOURCC_UYVY, IMGFMT_UYVY},
+    {VA_FOURCC_YUY2, IMGFMT_YUYV},
+    // Note: not sure about endian issues (the mp formats are byte-addressed)
+    {VA_FOURCC_RGBA, IMGFMT_RGBA},
+    {VA_FOURCC_RGBX, IMGFMT_RGBA},
+    {VA_FOURCC_BGRA, IMGFMT_BGRA},
+    {VA_FOURCC_BGRX, IMGFMT_BGRA},
+    {0             , IMGFMT_NONE}
+};
+
+enum mp_imgfmt va_fourcc_to_imgfmt(uint32_t fourcc)
+{
+    for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) {
+        if (entry->va == fourcc)
+            return entry->mp;
+    }
+    return IMGFMT_NONE;
+}
+
+uint32_t va_fourcc_from_imgfmt(int imgfmt)
+{
+    for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) {
+        if (entry->mp == imgfmt)
+            return entry->va;
+    }
+    return 0;
+}
+
+struct va_image_formats {
+    VAImageFormat *entries;
+    int num;
+};
+
+static void va_get_formats(struct mp_vaapi_ctx *ctx)
+{
+    int num = vaMaxNumImageFormats(ctx->display);
+    VAImageFormat entries[num];
+    VAStatus status = vaQueryImageFormats(ctx->display, entries, &num);
+    if (!check_va_status(status, "vaQueryImageFormats()"))
+        return;
+    struct va_image_formats *formats = talloc_ptrtype(ctx, formats);
+    formats->entries = talloc_array(formats, VAImageFormat, num);
+    formats->num = num;
+    VA_VERBOSE("%d image formats available:\n", num);
+    for (int i = 0; i < num; i++) {
+        formats->entries[i] = entries[i];
+        VA_VERBOSE("  %s\n", VA_STR_FOURCC(entries[i].fourcc));
+    }
+    ctx->image_formats = formats;
+}
+
+struct mp_vaapi_ctx *va_initialize(VADisplay *display)
+{
+    int major_version, minor_version;
+    int status = vaInitialize(display, &major_version, &minor_version);
+    if (!check_va_status(status, "vaInitialize()"))
+        return NULL;
+
+    VA_VERBOSE("VA API version %d.%d\n", major_version, minor_version);
+
+    struct mp_vaapi_ctx *res = talloc_ptrtype(NULL, res);
+    *res = (struct mp_vaapi_ctx) {
+        .display = display,
+    };
+
+    va_get_formats(res);
+    if (!res->image_formats)
+        goto error;
+    return res;
+
+error:
+    if (res->display)
+        vaTerminate(res->display);
+    talloc_free(res);
+    return NULL;
+}
+
+// Undo va_initialize, and close the VADisplay.
+void va_destroy(struct mp_vaapi_ctx *ctx)
+{
+    if (ctx) {
+        if (ctx->display)
+            vaTerminate(ctx->display);
+        talloc_free(ctx);
+    }
+}
+
+VAImageFormat *va_image_format_from_imgfmt(const struct va_image_formats *formats,
+                                           int imgfmt)
+{
+    const int fourcc = va_fourcc_from_imgfmt(imgfmt);
+    if (!formats || !formats->num || !fourcc)
+        return NULL;
+    for (int i = 0; i < formats->num; i++) {
+        if (formats->entries[i].fourcc == fourcc)
+            return &formats->entries[i];
+    }
+    return NULL;
+}
+
+static void va_surface_destroy(struct va_surface *surface);
+
+struct va_surface_pool {
+    VADisplay display;
+    int rt_format;
+    int num_surfaces, lru_counter;
+    struct va_surface **surfaces;
+};
+
+typedef struct va_surface_priv {
+    VADisplay display;
+    VAImage image;       // used for sofwtare decoding case
+    bool is_derived;     // is image derived by vaDeriveImage()?
+    bool is_used;        // referenced
+    bool is_dead;        // used, but deallocate VA objects as soon as possible
+    int  order;          // for LRU allocation
+} va_surface_priv_t;
+
+struct va_surface_pool *va_surface_pool_alloc(VADisplay display, int rt_format)
+{
+    struct va_surface_pool *pool = talloc_ptrtype(NULL, pool);
+    *pool = (struct va_surface_pool) {
+        .display = display,
+        .rt_format = rt_format
+    };
+    return pool;
+}
+
+
+void va_surface_pool_release(struct va_surface_pool *pool)
+{
+    if (!pool)
+        return;
+    va_surface_pool_clear(pool);
+    talloc_free(pool);
+}
+
+void va_surface_pool_releasep(struct va_surface_pool **pool) {
+    if (!pool)
+        return;
+    va_surface_pool_release(*pool);
+    *pool = NULL;
+}
+
+void va_surface_pool_clear(struct va_surface_pool *pool)
+{
+    for (int i=0; i<pool->num_surfaces; ++i) {
+        struct va_surface *s = pool->surfaces[i];
+        if (s->p->is_used)
+            s->p->is_dead = true;
+        else
+            va_surface_destroy(s);
+    }
+    talloc_free(pool->surfaces);
+    pool->num_surfaces = 0;
+}
+
+void va_surface_destroy(struct va_surface *surface)
+{
+    if (!surface)
+        return;
+    if (surface->id != VA_INVALID_ID) {
+        va_surface_priv_t *p = surface->p;
+        assert(!p->is_used);
+        if (p->image.image_id != VA_INVALID_ID)
+            vaDestroyImage(p->display, p->image.image_id);
+        vaDestroySurfaces(p->display, &surface->id, 1);
+    }
+    talloc_free(surface);
+}
+
+void va_surface_release(struct va_surface *surface)
+{
+    if (!surface)
+        return;
+    surface->p->is_used = false;
+    if (surface->p->is_dead)
+        va_surface_destroy(surface);
+}
+
+void va_surface_releasep(struct va_surface **surface)
+{
+    if (!surface)
+        return;
+    va_surface_release(*surface);
+    *surface = NULL;
+}
+
+static struct va_surface *va_surface_alloc(struct va_surface_pool *pool,
+                                           int w, int h)
+{
+    VASurfaceID id = VA_INVALID_ID;
+    VAStatus status;
+    status = vaCreateSurfaces(pool->display, w, h, pool->rt_format, 1, &id);
+    if (!check_va_status(status, "vaCreateSurfaces()"))
+        return NULL;
+
+    struct va_surface *surface = talloc_ptrtype(NULL, surface);
+    if (!surface)
+        return NULL;
+
+    MP_TARRAY_APPEND(NULL, pool->surfaces, pool->num_surfaces, surface);
+    surface->id = id;
+    surface->w = w;
+    surface->h = h;
+    surface->rt_format = pool->rt_format;
+    surface->p = talloc_zero(surface, va_surface_priv_t);
+    surface->p->display = pool->display;
+    surface->p->image.image_id = surface->p->image.buf = VA_INVALID_ID;
+    return surface;
+}
+
+struct mp_image *va_surface_pool_get_wrapped(struct va_surface_pool *pool,
+                                             const struct va_image_formats *formats,
+                                             int imgfmt, int w, int h)
+{
+    return va_surface_wrap(va_surface_pool_get_by_imgfmt(pool, formats, imgfmt,
+                                                         w, h));
+}
+
+int va_surface_pool_rt_format(const struct va_surface_pool *pool)
+{
+    return pool->rt_format;
+}
+
+bool va_surface_pool_reserve(struct va_surface_pool *pool, int count,
+                             int w, int h)
+{
+    for (int i=0; i<pool->num_surfaces && count > 0; ++i) {
+        const struct va_surface *s = pool->surfaces[i];
+        if (s->w == w && s->h == h && !s->p->is_used)
+            --count;
+    }
+    while (count > 0) {
+        if (!va_surface_alloc(pool, w, h))
+            break;
+        --count;
+    }
+    return !count;
+}
+
+struct va_surface *va_surface_pool_get(struct va_surface_pool *pool,
+                                       int w, int h)
+{
+    struct va_surface *best = NULL;
+    for (int i=0; i<pool->num_surfaces; ++i) {
+        struct va_surface *s = pool->surfaces[i];
+        if (!s->p->is_used && s->w == w && s->h == h) {
+            if (!best || best->p->order > s->p->order)
+                best = s;
+        }
+    }
+    if (!best)
+        best = va_surface_alloc(pool, w, h);
+    if (best) {
+        best->p->is_used = true;
+        best->p->order = ++pool->lru_counter;
+    }
+    return best;
+}
+
+static void va_surface_image_destroy(struct va_surface *surface)
+{
+    if (!surface || surface->p->image.image_id == VA_INVALID_ID)
+        return;
+    va_surface_priv_t *p = surface->p;
+    vaDestroyImage(p->display, p->image.image_id);
+    p->image.image_id = VA_INVALID_ID;
+    p->is_derived = false;
+}
+
+static VAImage *va_surface_image_alloc(struct va_surface *surface,
+                                       VAImageFormat *format)
+{
+    if (!format || !surface)
+        return NULL;
+    va_surface_priv_t *p = surface->p;
+    if (p->image.image_id != VA_INVALID_ID &&
+        p->image.format.fourcc == format->fourcc)
+        return &p->image;
+    va_surface_image_destroy(surface);
+
+    VAStatus status = vaDeriveImage(p->display, surface->id, &p->image);
+    if (check_va_status(status, "vaDeriveImage()")) {
+        /* vaDeriveImage() is supported, check format */
+        if (p->image.format.fourcc == format->fourcc &&
+                p->image.width == surface->w && p->image.height == surface->h) {
+            p->is_derived = true;
+            VA_VERBOSE("Using vaDeriveImage()\n");
+        } else {
+            vaDestroyImage(p->display, p->image.image_id);
+            p->image.image_id = VA_INVALID_ID;
+            status = VA_STATUS_ERROR_OPERATION_FAILED;
+        }
+    }
+    if (status != VA_STATUS_SUCCESS) {
+        status = vaCreateImage(p->display, format, surface->w, surface->h,
+                               &p->image);
+        if (!check_va_status(status, "vaCreateImage()")) {
+            p->image.image_id = VA_INVALID_ID;
+            return NULL;
+        }
+    }
+    return &surface->p->image;
+}
+
+
+
+struct va_surface *va_surface_pool_get_by_imgfmt(struct va_surface_pool *pool,
+                                                 const struct va_image_formats *formats,
+                                                 int imgfmt, int w, int h)
+{
+    if (imgfmt == IMGFMT_VAAPI)
+        return va_surface_pool_get(pool, w, h);
+    VAImageFormat *format = va_image_format_from_imgfmt(formats, imgfmt);
+    if (!format)
+        return NULL;
+    // WTF: no mapping from VAImageFormat -> VA_RT_FORMAT_
+    struct va_surface *surface = va_surface_pool_get(pool, w, h);
+    if (!surface)
+        return NULL;
+    if (va_surface_image_alloc(surface, format))
+        return surface;
+    va_surface_release(surface);
+    return NULL;
+}
+
+static void free_va_surface(void *arg)
+{
+    va_surface_release((struct va_surface*)arg);
+}
+
+struct mp_image *va_surface_wrap(struct va_surface *surface)
+{
+    if (!surface)
+        return NULL;
+
+    struct mp_image img = {0};
+    mp_image_setfmt(&img, IMGFMT_VAAPI);
+    mp_image_set_size(&img, surface->w, surface->h);
+    img.planes[0] = (uint8_t*)surface;
+    img.planes[3] = (uint8_t*)(uintptr_t)surface->id;
+    return mp_image_new_custom_ref(&img, surface, free_va_surface);
+}
+
+VASurfaceID va_surface_id_in_mp_image(const struct mp_image *mpi)
+{
+    return mpi && IMGFMT_IS_VAAPI(mpi->imgfmt) ?
+        (VASurfaceID)(uintptr_t)mpi->planes[3] : VA_INVALID_ID;
+}
+
+struct va_surface *va_surface_in_mp_image(struct mp_image *mpi)
+{
+    return mpi && IMGFMT_IS_VAAPI(mpi->imgfmt) ?
+        (struct va_surface*)mpi->planes[0] : NULL;
+}
+
+VASurfaceID va_surface_id(const struct va_surface *surface)
+{
+    return surface->id;
+}
+
+bool va_image_map(VADisplay display, VAImage *image, struct mp_image *mpi)
+{
+    int imgfmt = va_fourcc_to_imgfmt(image->format.fourcc);
+    if (imgfmt == IMGFMT_NONE)
+        return false;
+    void *data = NULL;
+    const VAStatus status = vaMapBuffer(display, image->buf, &data);
+    if (!check_va_status(status, "vaMapBuffer()"))
+        return false;
+
+    *mpi = (struct mp_image) {0};
+    mp_image_setfmt(mpi, imgfmt);
+    mp_image_set_size(mpi, image->width, image->height);
+
+    for (int p = 0; p < image->num_planes; p++) {
+        mpi->stride[p] = image->pitches[p];
+        mpi->planes[p] = (uint8_t *)data + image->offsets[p];
+    }
+
+    if (image->format.fourcc == VA_FOURCC_YV12) {
+        FFSWAP(unsigned int, mpi->stride[1], mpi->stride[2]);
+        FFSWAP(uint8_t *, mpi->planes[1], mpi->planes[2]);
+    }
+
+    return true;
+}
+
+bool va_image_unmap(VADisplay display, VAImage *image)
+{
+    const VAStatus status = vaUnmapBuffer(display, image->buf);
+    return check_va_status(status, "vaUnmapBuffer()");
+}
+
+bool va_surface_upload(struct va_surface *surface, const struct mp_image *mpi)
+{
+    va_surface_priv_t *p = surface->p;
+    if (p->image.image_id == VA_INVALID_ID)
+        return false;
+
+    struct mp_image img;
+    if (!va_image_map(p->display, &p->image, &img))
+        return false;
+    mp_image_copy(&img, (struct mp_image*)mpi);
+    va_image_unmap(p->display, &p->image);
+
+    if (!p->is_derived) {
+        VAStatus status = vaPutImage2(p->display, surface->id,
+                                      p->image.image_id,
+                                      0, 0, mpi->w, mpi->h,
+                                      0, 0, mpi->w, mpi->h);
+        if (!check_va_status(status, "vaPutImage()"))
+            return false;
+    }
+
+    return true;
+}
+
+struct mp_image *va_surface_download(const struct va_surface *surface,
+                                     const struct va_image_formats *formats)
+{
+    VAStatus status = vaSyncSurface(surface->p->display, surface->id);
+    if (!check_va_status(status, "vaSyncSurface()"))
+        return NULL;
+
+    // We have no clue which format will work, so try them all.
+    // This code is just for screenshots, so it's ok not to cache the right
+    // format (to prevent unnecessary work), and we don't attempt to use
+    // vaDeriveImage() for direct access either.
+    for (int i = 0; i < formats->num; i++) {
+        VAImageFormat *format = &formats->entries[i];
+        const enum mp_imgfmt imgfmt = va_fourcc_to_imgfmt(format->fourcc);
+        if (imgfmt == IMGFMT_NONE)
+            continue;
+        VAImage image;
+        status = vaCreateImage(surface->p->display, format,
+                               surface->w, surface->h, &image);
+        if (!check_va_status(status, "vaCreateImage()"))
+            continue;
+        status = vaGetImage(surface->p->display, surface->id, 0, 0,
+                            surface->w, surface->h, image.image_id);
+        if (status != VA_STATUS_SUCCESS) {
+            vaDestroyImage(surface->p->display, image.image_id);
+            continue;
+        }
+        struct mp_image *dst = NULL;
+        struct mp_image tmp;
+        if (va_image_map(surface->p->display, &image, &tmp)) {
+            assert(tmp.imgfmt == imgfmt);
+            dst = mp_image_alloc(imgfmt, tmp.w, tmp.h);
+            mp_image_copy(dst, &tmp);
+            va_image_unmap(surface->p->display, &image);
+        }
+        vaDestroyImage(surface->p->display, image.image_id);
+        return dst;
+    }
+    VA_ERROR("failed to get surface data.\n");
+    return NULL;
+}
+
diff --git a/video/vaapi.h b/video/vaapi.h
index af6d7e70c4..fa87658391 100644
--- a/video/vaapi.h
+++ b/video/vaapi.h
@@ -3,8 +3,8 @@
 
 #include <stdbool.h>
 #include <inttypes.h>
-
 #include <va/va.h>
+#include <va/va_x11.h>
 
 /* Compatibility glue with VA-API >= 0.31 */
 #if defined VA_CHECK_VERSION
@@ -51,23 +51,72 @@
 # define USE_VAAPI_SCALING 0
 #endif
 
-#include "mpvcore/mp_msg.h"
+#ifndef VA_FOURCC_YV12
+#define VA_FOURCC_YV12 0x32315659
+#endif
+#ifndef VA_FOURCC_IYUV
+#define VA_FOURCC_IYUV 0x56555949
+#endif
+#ifndef VA_FOURCC_I420
+#define VA_FOURCC_I420 VA_FOURCC('I', '4', '2', '0')
+#endif
+#ifndef VA_FOURCC_RGBX
+#define VA_FOURCC_RGBX 0x58424752
+#endif
+#ifndef VA_FOURCC_BGRX
+#define VA_FOURCC_BGRX 0x58524742
+#endif
 
-static inline bool check_va_status(VAStatus status, const char *msg)
-{
-    if (status != VA_STATUS_SUCCESS) {
-        mp_msg(MSGT_VO, MSGL_ERR, "[vaapi] %s: %s\n", msg, vaErrorStr(status));
-        return false;
-    }
-    return true;
-}
+#define VA_STR_FOURCC(fcc) \
+    (const char[]){(fcc), (fcc) >> 8u, (fcc) >> 16u, (fcc) >> 24u, 0}
+
+#include "mp_image.h"
 
 struct mp_vaapi_ctx {
     VADisplay display;
-    struct mp_image *(*get_surface)(struct mp_vaapi_ctx *ctx, int va_format,
-                                    int mp_format, int w, int h);
-    void (*flush)(struct mp_vaapi_ctx *ctx);
-    void *priv; // for VO
+    struct va_image_formats *image_formats;
 };
 
+struct va_surface_pool;
+struct va_image_formats;
+
+struct va_surface {
+    VASurfaceID id;      // VA_INVALID_ID if unallocated
+    int w, h, rt_format; // parameters of allocated image (0/0/-1 unallocated)
+
+    struct va_surface_priv *p;
+};
+
+bool                     check_va_status(VAStatus status, const char *msg);
+
+int                      va_get_colorspace_flag(enum mp_csp csp);
+
+struct mp_vaapi_ctx     *va_initialize(VADisplay *display);
+void                     va_destroy(struct mp_vaapi_ctx *ctx);
+
+enum mp_imgfmt           va_fourcc_to_imgfmt(uint32_t fourcc);
+uint32_t                 va_fourcc_from_imgfmt(int imgfmt);
+VAImageFormat *          va_image_format_from_imgfmt(const struct va_image_formats *formats, int imgfmt);
+bool                     va_image_map(VADisplay display, VAImage *image, struct mp_image *mpi);
+bool                     va_image_unmap(VADisplay display, VAImage *image);
+
+struct va_surface_pool * va_surface_pool_alloc(VADisplay display, int rt_format);
+void                     va_surface_pool_release(struct va_surface_pool *pool);
+void                     va_surface_pool_releasep(struct va_surface_pool **pool);
+void                     va_surface_pool_clear(struct va_surface_pool *pool);
+bool                     va_surface_pool_reserve(struct va_surface_pool *pool, int count, int w, int h);
+int                      va_surface_pool_rt_format(const struct va_surface_pool *pool);
+struct va_surface *      va_surface_pool_get(struct va_surface_pool *pool, int w, int h);
+struct va_surface *      va_surface_pool_get_by_imgfmt(struct va_surface_pool *pool, const struct va_image_formats *formats, int imgfmt, int w, int h);
+struct mp_image *        va_surface_pool_get_wrapped(struct va_surface_pool *pool, const struct va_image_formats *formats, int imgfmt, int w, int h);
+
+void                     va_surface_release(struct va_surface *surface);
+void                     va_surface_releasep(struct va_surface **surface);
+struct va_surface *      va_surface_in_mp_image(struct mp_image *mpi);
+struct mp_image *        va_surface_wrap(struct va_surface *surface); // takes ownership
+VASurfaceID              va_surface_id(const struct va_surface *surface);
+VASurfaceID              va_surface_id_in_mp_image(const struct mp_image *mpi);
+bool                     va_surface_upload(struct va_surface *surface, const struct mp_image *mpi);
+struct mp_image *        va_surface_download(const struct va_surface *surface, const struct va_image_formats *formats);
+
 #endif