diff --git a/configure b/configure
index e3831e6dee..c6c6fac18c 100755
--- a/configure
+++ b/configure
@@ -3084,6 +3084,8 @@ h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
 h264_d3d11va2_hwaccel_deps="d3d11va"
 h264_d3d11va2_hwaccel_select="h264_decoder"
+h264_d3d12va_hwaccel_deps="d3d12va"
+h264_d3d12va_hwaccel_select="h264_decoder"
 h264_dxva2_hwaccel_deps="dxva2"
 h264_dxva2_hwaccel_select="h264_decoder"
 h264_nvdec_hwaccel_deps="nvdec"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fd9883d2ca..ead3a4480e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -986,6 +986,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER)         += adpcm.o adpcm_data.o
 
 # hardware accelerators
 OBJS-$(CONFIG_D3D11VA)                    += dxva2.o
+OBJS-$(CONFIG_D3D12VA)                    += dxva2.o d3d12va_decode.o
 OBJS-$(CONFIG_DXVA2)                      += dxva2.o
 OBJS-$(CONFIG_NVDEC)                      += nvdec.o
 OBJS-$(CONFIG_VAAPI)                      += vaapi_decode.o
@@ -1003,6 +1004,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
 OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
+OBJS-$(CONFIG_H264_D3D12VA_HWACCEL)       += dxva2_h264.o d3d12va_h264.o
 OBJS-$(CONFIG_H264_NVDEC_HWACCEL)         += nvdec_h264.o
 OBJS-$(CONFIG_H264_QSV_HWACCEL)           += qsvdec.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
@@ -1296,6 +1298,7 @@ SKIPHEADERS                            += %_tablegen.h                  \
 
 SKIPHEADERS-$(CONFIG_AMF)              += amfenc.h
 SKIPHEADERS-$(CONFIG_D3D11VA)          += d3d11va.h dxva2_internal.h
+SKIPHEADERS-$(CONFIG_D3D12VA)          += d3d12va_decode.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_JNI)              += ffjni.h
 SKIPHEADERS-$(CONFIG_LCMS2)            += fflcms2.h
diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h
index 6816b6c1e6..27f40e5519 100644
--- a/libavcodec/d3d11va.h
+++ b/libavcodec/d3d11va.h
@@ -45,9 +45,6 @@
  * @{
  */
 
-#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for Direct3D11 and old UVD/UVD+ ATI video cards
-#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work around for Direct3D11 and old Intel GPUs with ClearVideo interface
-
 /**
  * This structure is used to provides the necessary configurations and data
  * to the Direct3D11 FFmpeg HWAccel implementation.
diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c
new file mode 100644
index 0000000000..03e565066c
--- /dev/null
+++ b/libavcodec/d3d12va_decode.c
@@ -0,0 +1,538 @@
+/*
+ * Direct3D 12 HW acceleration video decoder
+ *
+ * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <initguid.h>
+
+#include "libavutil/common.h"
+#include "libavutil/log.h"
+#include "libavutil/time.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/hwcontext_d3d12va_internal.h"
+#include "libavutil/hwcontext_d3d12va.h"
+#include "avcodec.h"
+#include "decode.h"
+#include "d3d12va_decode.h"
+
+typedef struct HelperObjects {
+    ID3D12CommandAllocator *command_allocator;
+    ID3D12Resource *buffer;
+    uint64_t fence_value;
+} HelperObjects;
+
+int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx)
+{
+    AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
+    return av_image_get_buffer_size(frames_ctx->sw_format, avctx->coded_width, avctx->coded_height, 1);
+}
+
+unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx,
+                                      D3D12VADecodeContext *ctx, const AVFrame *frame,
+                                      int curr)
+{
+    AVD3D12VAFrame *f;
+    ID3D12Resource *res;
+    unsigned i;
+
+    f = (AVD3D12VAFrame *)frame->data[0];
+    if (!f)
+        goto fail;
+
+    res = f->texture;
+    if (!res)
+        goto fail;
+
+    if (!curr) {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (ctx->ref_resources[i] && res == ctx->ref_resources[i]) {
+                ctx->used_mask |= 1 << i;
+                return i;
+            }
+        }
+    } else {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (!((ctx->used_mask >> i) & 0x1)) {
+                ctx->ref_resources[i] = res;
+                return i;
+            }
+        }
+    }
+
+fail:
+    assert(0);
+    return 0;
+}
+
+static int d3d12va_get_valid_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator **ppAllocator,
+                                            ID3D12Resource **ppBuffer)
+{
+    HRESULT hr;
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    HelperObjects obj = { 0 };
+    D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
+
+    D3D12_RESOURCE_DESC desc = {
+        .Dimension        = D3D12_RESOURCE_DIMENSION_BUFFER,
+        .Alignment        = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
+        .Width            = ctx->bitstream_size,
+        .Height           = 1,
+        .DepthOrArraySize = 1,
+        .MipLevels        = 1,
+        .Format           = DXGI_FORMAT_UNKNOWN,
+        .SampleDesc       = { .Count = 1, .Quality = 0 },
+        .Layout           = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+        .Flags            = D3D12_RESOURCE_FLAG_NONE,
+    };
+
+    if (av_fifo_peek(ctx->objects_queue, &obj, 1, 0) >= 0) {
+        uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx.fence);
+        if (completion >= obj.fence_value) {
+            *ppAllocator = obj.command_allocator;
+            *ppBuffer    = obj.buffer;
+            av_fifo_read(ctx->objects_queue, &obj, 1);
+            return 0;
+        }
+    }
+
+    hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
+                                             &IID_ID3D12CommandAllocator, (void **)ppAllocator);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create a new command allocator!\n");
+        return AVERROR(EINVAL);
+    }
+
+    hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx->device, &heap_props, D3D12_HEAP_FLAG_NONE,
+                                              &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
+                                              &IID_ID3D12Resource, (void **)ppBuffer);
+
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create a new d3d12 buffer!\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int d3d12va_discard_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator *pAllocator,
+                                          ID3D12Resource *pBuffer, uint64_t fence_value)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+
+    HelperObjects obj = {
+        .command_allocator = pAllocator,
+        .buffer            = pBuffer,
+        .fence_value       = fence_value,
+    };
+
+    if (av_fifo_write(ctx->objects_queue, &obj, 1) < 0) {
+        D3D12_OBJECT_RELEASE(pAllocator);
+        D3D12_OBJECT_RELEASE(pBuffer);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int d3d12va_fence_completion(AVD3D12VASyncContext *psync_ctx)
+{
+    uint64_t completion = ID3D12Fence_GetCompletedValue(psync_ctx->fence);
+    if (completion < psync_ctx->fence_value) {
+        if (FAILED(ID3D12Fence_SetEventOnCompletion(psync_ctx->fence, psync_ctx->fence_value, psync_ctx->event)))
+            return AVERROR(EINVAL);
+
+        WaitForSingleObjectEx(psync_ctx->event, INFINITE, FALSE);
+    }
+
+    return 0;
+}
+
+static void bufref_free_interface(void *opaque, uint8_t *data)
+{
+    D3D12_OBJECT_RELEASE(opaque);
+}
+
+static AVBufferRef *bufref_wrap_interface(IUnknown *iface)
+{
+    return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, 0);
+}
+
+static int d3d12va_sync_with_gpu(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+
+    DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value));
+    return d3d12va_fence_completion(&ctx->sync_ctx);
+
+fail:
+    return AVERROR(EINVAL);
+}
+
+static int d3d12va_create_decoder_heap(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext   *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx;
+    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
+
+    D3D12_VIDEO_DECODER_HEAP_DESC desc = {
+        .NodeMask      = 0,
+        .Configuration = ctx->cfg,
+        .DecodeWidth   = frames_ctx->width,
+        .DecodeHeight  = frames_ctx->height,
+        .Format        = frames_hwctx->format,
+        .FrameRate     = { avctx->framerate.num, avctx->framerate.den },
+        .BitRate       = avctx->bit_rate,
+        .MaxDecodePictureBufferCount = ctx->max_num_ref,
+    };
+
+    DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(device_hwctx->video_device, &desc,
+             &IID_ID3D12VideoDecoderHeap, (void **)&ctx->decoder_heap));
+
+    return 0;
+
+fail:
+    if (ctx->decoder) {
+        av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames with an extent "
+            "[width(%d), height(%d)], on your device!\n", frames_ctx->width, frames_ctx->height);
+    }
+
+    return AVERROR(EINVAL);
+}
+
+static int d3d12va_create_decoder(AVCodecContext *avctx)
+{
+    D3D12_VIDEO_DECODER_DESC desc;
+    D3D12VADecodeContext   *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx;
+    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
+
+    D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = {
+        .NodeIndex     = 0,
+        .Configuration = ctx->cfg,
+        .Width         = frames_ctx->width,
+        .Height        = frames_ctx->height,
+        .DecodeFormat  = frames_hwctx->format,
+        .FrameRate     = { avctx->framerate.num, avctx->framerate.den },
+        .BitRate       = avctx->bit_rate,
+    };
+
+    DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(device_hwctx->video_device, D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
+                                                   &feature, sizeof(feature)));
+    if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) ||
+        !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) {
+        av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on this device\n");
+        return AVERROR(EINVAL);
+    }
+
+    desc = (D3D12_VIDEO_DECODER_DESC) {
+        .NodeMask = 0,
+        .Configuration = ctx->cfg,
+    };
+
+    DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(device_hwctx->video_device, &desc, &IID_ID3D12VideoDecoder,
+                                                  (void **)&ctx->decoder));
+
+    ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder);
+    if (!ctx->decoder_ref)
+        return AVERROR(ENOMEM);
+
+    return 0;
+
+fail:
+    return AVERROR(EINVAL);
+}
+
+int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext      *frames_ctx   = (AVHWFramesContext *)hw_frames_ctx->data;
+    AVHWDeviceContext      *device_ctx   = frames_ctx->device_ctx;
+
+    frames_ctx->format    = AV_PIX_FMT_D3D12;
+    frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
+    frames_ctx->width     = avctx->width;
+    frames_ctx->height    = avctx->height;
+
+    return 0;
+}
+
+int ff_d3d12va_decode_init(AVCodecContext *avctx)
+{
+    int ret;
+    AVHWFramesContext *frames_ctx;
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    ID3D12Resource *buffer = NULL;
+    ID3D12CommandAllocator *command_allocator = NULL;
+    D3D12_COMMAND_QUEUE_DESC queue_desc = {
+        .Type     = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
+        .Priority = 0,
+        .Flags    = D3D12_COMMAND_QUEUE_FLAG_NONE,
+        .NodeMask = 0,
+    };
+
+    ctx->pix_fmt = avctx->hwaccel->pix_fmt;
+
+    ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA);
+    if (ret < 0)
+        return ret;
+
+    frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
+    ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx->hwctx;
+
+    if (frames_ctx->format != ctx->pix_fmt) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n");
+        goto fail;
+    }
+
+    ret = d3d12va_create_decoder(avctx);
+    if (ret < 0)
+        goto fail;
+
+    ret = d3d12va_create_decoder_heap(avctx);
+    if (ret < 0)
+        goto fail;
+
+    ctx->bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx);
+
+    ctx->ref_resources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_resources));
+    if (!ctx->ref_resources)
+        return AVERROR(ENOMEM);
+
+    ctx->ref_subresources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_subresources));
+    if (!ctx->ref_subresources)
+        return AVERROR(ENOMEM);
+
+    ctx->objects_queue = av_fifo_alloc2(D3D12VA_VIDEO_DEC_ASYNC_DEPTH,
+                                          sizeof(HelperObjects), AV_FIFO_FLAG_AUTO_GROW);
+    if (!ctx->objects_queue)
+        return AVERROR(ENOMEM);
+
+
+    DX_CHECK(ID3D12Device_CreateFence(ctx->device_ctx->device, 0, D3D12_FENCE_FLAG_NONE,
+                                      &IID_ID3D12Fence, (void **)&ctx->sync_ctx.fence));
+
+    ctx->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (!ctx->sync_ctx.event)
+        goto fail;
+
+    ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer);
+    if (ret < 0)
+        goto fail;
+
+    DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device, &queue_desc,
+             &IID_ID3D12CommandQueue, (void **)&ctx->command_queue));
+
+    DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0, queue_desc.Type,
+             command_allocator, NULL, &IID_ID3D12CommandList, (void **)&ctx->command_list));
+
+    DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list));
+
+    ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list);
+
+    ret = d3d12va_sync_with_gpu(avctx);
+    if (ret < 0)
+        goto fail;
+
+    d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value);
+    if (ret < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    D3D12_OBJECT_RELEASE(command_allocator);
+    D3D12_OBJECT_RELEASE(buffer);
+    ff_d3d12va_decode_uninit(avctx);
+
+    return AVERROR(EINVAL);
+}
+
+int ff_d3d12va_decode_uninit(AVCodecContext *avctx)
+{
+    int num_allocator = 0;
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    HelperObjects obj;
+
+    if (ctx->sync_ctx.fence)
+        d3d12va_sync_with_gpu(avctx);
+
+    av_freep(&ctx->ref_resources);
+    av_freep(&ctx->ref_subresources);
+
+    D3D12_OBJECT_RELEASE(ctx->command_list);
+    D3D12_OBJECT_RELEASE(ctx->command_queue);
+
+    if (ctx->objects_queue) {
+        while (av_fifo_read(ctx->objects_queue, &obj, 1) >= 0) {
+            num_allocator++;
+            D3D12_OBJECT_RELEASE(obj.buffer);
+            D3D12_OBJECT_RELEASE(obj.command_allocator);
+        }
+
+        av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator);
+    }
+
+    av_fifo_freep2(&ctx->objects_queue);
+
+    D3D12_OBJECT_RELEASE(ctx->sync_ctx.fence);
+    if (ctx->sync_ctx.event)
+        CloseHandle(ctx->sync_ctx.event);
+
+    D3D12_OBJECT_RELEASE(ctx->decoder_heap);
+
+    av_buffer_unref(&ctx->decoder_ref);
+
+    return 0;
+}
+
+static inline int d3d12va_update_reference_frames_state(AVCodecContext *avctx, D3D12_RESOURCE_BARRIER *barriers, int state_before, int state_end)
+{
+    D3D12VADecodeContext   *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
+
+    int num_barrier = 0;
+    for (int i = 0; i < ctx->max_num_ref; i++) {
+        if (((ctx->used_mask >> i) & 0x1) && ctx->ref_resources[i]) {
+            barriers[num_barrier].Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){
+                .pResource   = ctx->ref_resources[i],
+                .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+                .StateBefore = state_before,
+                .StateAfter  = state_end,
+            };
+            num_barrier++;
+        }
+    }
+
+    return num_barrier;
+}
+
+int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
+                              const void *pp, unsigned pp_size,
+                              const void *qm, unsigned qm_size,
+                              int(*update_input_arguments)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *))
+{
+    int ret;
+    D3D12VADecodeContext   *ctx               = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx        = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VAFramesContext *frames_hwctx      = frames_ctx->hwctx;
+    ID3D12Resource         *buffer            = NULL;
+    ID3D12CommandAllocator *command_allocator = NULL;
+    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame *)frame->data[0];
+    ID3D12Resource         *resource          = (ID3D12Resource *)f->texture;
+
+    ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
+    D3D12_RESOURCE_BARRIER barriers[32] = { 0 };
+
+    D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = {
+        .NumFrameArguments = 2,
+        .FrameArguments = {
+            [0] = {
+                .Type  = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
+                .Size  = pp_size,
+                .pData = (void *)pp,
+            },
+            [1] = {
+                .Type  = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
+                .Size  = qm_size,
+                .pData = (void *)qm,
+            },
+        },
+        .pHeap = ctx->decoder_heap,
+    };
+
+    D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = {
+        .ConversionArguments = { 0 },
+        .OutputSubresource   = 0,
+        .pOutputTexture2D    = resource,
+    };
+
+    UINT num_barrier = 1;
+    barriers[0] = (D3D12_RESOURCE_BARRIER) {
+        .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+        .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+        .Transition = {
+            .pResource   = resource,
+            .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+            .StateBefore = D3D12_RESOURCE_STATE_COMMON,
+            .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+        },
+    };
+
+    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
+    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
+    input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
+    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
+
+    ret = d3d12va_fence_completion(&f->sync_ctx);
+    if (ret < 0)
+        goto fail;
+
+    if (!qm)
+        input_args.NumFrameArguments = 1;
+
+    ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer);
+    if (ret < 0)
+        goto fail;
+
+    ret = update_input_arguments(avctx, &input_args, buffer);
+    if (ret < 0)
+        goto fail;
+
+    DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator));
+
+    DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator));
+
+    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
+
+    ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers);
+
+    ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, &output_args, &input_args);
+
+    barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter;
+    barriers[0].Transition.StateAfter  = D3D12_RESOURCE_STATE_COMMON;
+    d3d12va_update_reference_frames_state(avctx, &barriers[1], D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, D3D12_RESOURCE_STATE_COMMON);
+
+    ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers);
+
+    DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list));
+
+    ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list);
+
+    DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value));
+
+    DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value));
+
+    ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+
+fail:
+    if (command_allocator)
+        d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value);
+    return AVERROR(EINVAL);
+}
diff --git a/libavcodec/d3d12va_decode.h b/libavcodec/d3d12va_decode.h
new file mode 100644
index 0000000000..b64994760a
--- /dev/null
+++ b/libavcodec/d3d12va_decode.h
@@ -0,0 +1,179 @@
+/*
+ * Direct3D 12 HW acceleration video decoder
+ *
+ * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_D3D12VA_DECODE_H
+#define AVCODEC_D3D12VA_DECODE_H
+
+#include "libavutil/fifo.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_d3d12va.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "hwaccel_internal.h"
+
+/**
+ * @brief This structure is used to provide the necessary configurations and data
+ * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder.
+ */
+typedef struct D3D12VADecodeContext {
+    AVBufferRef *decoder_ref;
+
+    /**
+     * D3D12 video decoder
+     */
+    ID3D12VideoDecoder *decoder;
+
+    /**
+     * D3D12 video decoder heap
+     */
+    ID3D12VideoDecoderHeap *decoder_heap;
+
+    /**
+     * D3D12 configuration used to create the decoder
+     *
+     * Specified by decoders
+     */
+    D3D12_VIDEO_DECODE_CONFIGURATION cfg;
+
+    /**
+     * A cached queue for reusing the D3D12 command allocators and upload buffers
+     *
+     * @see https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-command-lists-and-bundles#id3d12commandallocator
+     */
+    AVFifo *objects_queue;
+
+    /**
+     * D3D12 command queue
+     */
+    ID3D12CommandQueue *command_queue;
+
+    /**
+     * D3D12 video decode command list
+     */
+    ID3D12VideoDecodeCommandList *command_list;
+
+    /**
+     * The array of resources used for reference frames
+     *
+     * The ref_resources.length is the same as D3D12VADecodeContext.max_num_ref
+     */
+    ID3D12Resource **ref_resources;
+
+    /**
+     * The array of subresources used for reference frames
+     *
+     * The ref_subresources.length is the same as D3D12VADecodeContext.max_num_ref
+     */
+    UINT *ref_subresources;
+
+    /**
+     * Maximum number of reference frames
+     */
+    UINT max_num_ref;
+
+    /**
+     * Used mask used to record reference frames indices
+     */
+    UINT used_mask;
+
+    /**
+     * Bitstream size for each frame
+     */
+    UINT bitstream_size;
+
+    /**
+     * The sync context used to sync command queue
+     */
+    AVD3D12VASyncContext sync_ctx;
+
+    /**
+     * A pointer to AVD3D12VADeviceContext used to create D3D12 objects
+     */
+    AVD3D12VADeviceContext *device_ctx;
+
+    /**
+     * Pixel format
+     */
+    enum AVPixelFormat pix_fmt;
+
+    /**
+     * Private to the FFmpeg AVHWAccel implementation
+     */
+    unsigned report_id;
+} D3D12VADecodeContext;
+
+/**
+ * @}
+ */
+#define D3D12VA_VIDEO_DEC_ASYNC_DEPTH 36
+#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext *)((avctx)->internal->hwaccel_priv_data))
+#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext *)(avctx)->hw_frames_ctx->data)
+
+/**
+ * @brief Get a suitable maximum bitstream size
+ *
+ * Creating and destroying a resource on d3d12 needs sync and reallocation, so use this function
+ * to help allocate a big enough bitstream buffer to avoid recreating resources when decoding.
+ *
+ * @return the suitable size
+ */
+int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx);
+
+/**
+ * @brief init D3D12VADecodeContext
+ *
+ * @return Error code (ret < 0 if failed)
+ */
+int ff_d3d12va_decode_init(AVCodecContext *avctx);
+
+/**
+ * @brief uninit D3D12VADecodeContext
+ *
+ * @return Error code (ret < 0 if failed)
+ */
+int ff_d3d12va_decode_uninit(AVCodecContext *avctx);
+
+/**
+ * @brief d3d12va common frame params
+ *
+ * @return Error code (ret < 0 if failed)
+ */
+int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+/**
+ * @brief d3d12va common end frame
+ *
+ * @param avctx    codec context
+ * @param frame    current output frame
+ * @param pp       picture parameters
+ * @param pp_size  the size of the picture parameters
+ * @param qm       quantization matrix
+ * @param qm_size  the size of the quantization matrix
+ * @param callback update decoder-specified input stream arguments
+ * @return Error code (ret < 0 if failed)
+ */
+int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
+    const void *pp, unsigned pp_size,
+    const void *qm, unsigned qm_size,
+    int(*)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *));
+
+#endif /* AVCODEC_D3D12VA_DEC_H */
diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c
new file mode 100644
index 0000000000..5a6d6852c8
--- /dev/null
+++ b/libavcodec/d3d12va_h264.c
@@ -0,0 +1,207 @@
+/*
+ * Direct3D 12 h264 HW acceleration
+ *
+ * copyright (c) 2022-2023 Wu Jianhua <toqsxw@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+#include "libavutil/avassert.h"
+#include "h264dec.h"
+#include "h264data.h"
+#include "h264_ps.h"
+#include "mpegutils.h"
+#include "dxva2_internal.h"
+#include "d3d12va_decode.h"
+#include "libavutil/hwcontext_d3d12va_internal.h"
+#include <dxva.h>
+
+typedef struct H264DecodePictureContext {
+    DXVA_PicParams_H264   pp;
+    DXVA_Qmatrix_H264     qm;
+    unsigned              slice_count;
+    DXVA_Slice_H264_Short slice_short[MAX_SLICES];
+    const uint8_t         *bitstream;
+    unsigned              bitstream_size;
+} H264DecodePictureContext;
+
+static void fill_slice_short(DXVA_Slice_H264_Short *slice,
+                             unsigned position, unsigned size)
+{
+    memset(slice, 0, sizeof(*slice));
+    slice->BSNALunitDataLocation = position;
+    slice->SliceBytesInBuffer    = size;
+    slice->wBadSliceChopping     = 0;
+}
+
+static int d3d12va_h264_start_frame(AVCodecContext *avctx,
+                                  av_unused const uint8_t *buffer,
+                                  av_unused uint32_t size)
+{
+    const H264Context        *h       = avctx->priv_data;
+    H264DecodePictureContext *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private;
+    D3D12VADecodeContext     *ctx     = D3D12VA_DECODE_CONTEXT(avctx);
+
+    if (!ctx)
+        return -1;
+
+    assert(ctx_pic);
+
+    ctx->used_mask = 0;
+
+    ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx, &ctx_pic->pp);
+
+    ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic->qm);
+
+    ctx_pic->slice_count    = 0;
+    ctx_pic->bitstream_size = 0;
+    ctx_pic->bitstream      = NULL;
+
+    return 0;
+}
+
+static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    unsigned position;
+    const H264Context        *h               = avctx->priv_data;
+    const H264SliceContext   *sl              = &h->slice_ctx[0];
+    const H264Picture        *current_picture = h->cur_pic_ptr;
+    H264DecodePictureContext *ctx_pic         = current_picture->hwaccel_picture_private;
+
+    if (ctx_pic->slice_count >= MAX_SLICES)
+        return AVERROR(ERANGE);
+
+    if (!ctx_pic->bitstream)
+        ctx_pic->bitstream = buffer;
+    ctx_pic->bitstream_size += size;
+
+    position = buffer - ctx_pic->bitstream;
+    fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position, size);
+    ctx_pic->slice_count++;
+
+    if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI)
+        ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */
+
+    return 0;
+}
+
+#define START_CODE 65536
+#define START_CODE_SIZE 3
+static int update_input_arguments(AVCodecContext *avctx, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, ID3D12Resource *buffer)
+{
+    D3D12VADecodeContext     *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext        *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VAFramesContext   *frames_hwctx = frames_ctx->hwctx;
+
+    const H264Context        *h               = avctx->priv_data;
+    const H264Picture        *current_picture = h->cur_pic_ptr;
+    H264DecodePictureContext *ctx_pic         = current_picture->hwaccel_picture_private;
+
+    int i;
+    uint8_t *mapped_data, *mapped_ptr;
+    DXVA_Slice_H264_Short *slice;
+    D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args;
+
+    if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer resource!\n");
+        return AVERROR(EINVAL);
+    }
+
+    mapped_ptr = mapped_data;
+    for (i = 0; i < ctx_pic->slice_count; i++) {
+        UINT position, size;
+        slice = &ctx_pic->slice_short[i];
+
+        position = slice->BSNALunitDataLocation;
+        size     = slice->SliceBytesInBuffer;
+
+        slice->SliceBytesInBuffer += START_CODE_SIZE;
+        slice->BSNALunitDataLocation = mapped_ptr - mapped_data;
+
+        *(uint32_t *)mapped_ptr = START_CODE;
+        mapped_ptr += START_CODE_SIZE;
+
+        memcpy(mapped_ptr, &ctx_pic->bitstream[position], size);
+        mapped_ptr += size;
+    }
+
+    ID3D12Resource_Unmap(buffer, 0, NULL);
+
+    input_args->CompressedBitstream = (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){
+        .pBuffer = buffer,
+        .Offset  = 0,
+        .Size    = mapped_ptr - mapped_data,
+    };
+
+    args = &input_args->FrameArguments[input_args->NumFrameArguments++];
+    args->Type  = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL;
+    args->Size  = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count;
+    args->pData = ctx_pic->slice_short;
+
+    return 0;
+}
+
+static int d3d12va_h264_end_frame(AVCodecContext *avctx)
+{
+    H264Context               *h       = avctx->priv_data;
+    H264DecodePictureContext  *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private;
+    H264SliceContext          *sl      = &h->slice_ctx[0];
+
+    int ret;
+
+    if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
+        return -1;
+
+    ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f,
+                                      &ctx_pic->pp, sizeof(ctx_pic->pp),
+                                      &ctx_pic->qm, sizeof(ctx_pic->qm),
+                                      update_input_arguments);
+    if (!ret)
+        ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height);
+
+    return ret;
+}
+
+static int d3d12va_h264_decode_init(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    DXVA_PicParams_H264 pp;
+
+    ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264;
+
+    ctx->max_num_ref = FF_ARRAY_ELEMS(pp.RefFrameList) + 1;
+
+    return ff_d3d12va_decode_init(avctx);
+}
+
+#if CONFIG_H264_D3D12VA_HWACCEL
+const FFHWAccel ff_h264_d3d12va_hwaccel = {
+    .p.name               = "h264_d3d12va",
+    .p.type               = AVMEDIA_TYPE_VIDEO,
+    .p.id                 = AV_CODEC_ID_H264,
+    .p.pix_fmt            = AV_PIX_FMT_D3D12,
+    .init                 = d3d12va_h264_decode_init,
+    .uninit               = ff_d3d12va_decode_uninit,
+    .start_frame          = d3d12va_h264_start_frame,
+    .decode_slice         = d3d12va_h264_decode_slice,
+    .end_frame            = d3d12va_h264_end_frame,
+    .frame_params         = ff_d3d12va_common_frame_params,
+    .frame_priv_data_size = sizeof(H264DecodePictureContext),
+    .priv_data_size       = sizeof(D3D12VADecodeContext),
+};
+#endif
diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index d7bc587562..7160a0008b 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -768,12 +768,17 @@ static void *get_surface(const AVCodecContext *avctx, const AVFrame *frame)
 }
 
 unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx,
-                                    const AVDXVAContext *ctx,
-                                    const AVFrame *frame)
+                                    AVDXVAContext *ctx, const AVFrame *frame,
+                                    int curr)
 {
     void *surface = get_surface(avctx, frame);
     unsigned i;
 
+#if CONFIG_D3D12VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D12) {
+        return ff_d3d12va_get_surface_index(avctx, (D3D12VADecodeContext *)ctx, frame, curr);
+    }
+#endif
 #if CONFIG_D3D11VA
     if (avctx->pix_fmt == AV_PIX_FMT_D3D11)
         return (intptr_t)frame->data[1];
@@ -1056,3 +1061,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext *avctx)
     else
         return 0;
 }
+
+unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx)
+{
+    unsigned *report_id = NULL;
+
+#if CONFIG_D3D12VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D12)
+        report_id = &ctx->d3d12va.report_id;
+#endif
+#if CONFIG_D3D11VA
+    if (ff_dxva2_is_d3d11(avctx))
+        report_id = &ctx->d3d11va.report_id;
+#endif
+#if CONFIG_DXVA2
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
+        report_id = &ctx->dxva2.report_id;
+#endif
+
+    return report_id;
+}
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index 22c93992f2..bdec6112e9 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -45,9 +45,6 @@
  * @{
  */
 
-#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards
-#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface
-
 /**
  * This structure is used to provides the necessary configurations and data
  * to the DXVA2 FFmpeg HWAccel implementation.
diff --git a/libavcodec/dxva2_av1.c b/libavcodec/dxva2_av1.c
index ab118a4356..12a895b791 100644
--- a/libavcodec/dxva2_av1.c
+++ b/libavcodec/dxva2_av1.c
@@ -75,7 +75,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
     pp->max_width  = seq->max_frame_width_minus_1 + 1;
     pp->max_height = seq->max_frame_height_minus_1 + 1;
 
-    pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.f);
+    pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.f, 1);
     pp->superres_denom      = frame_header->use_superres ? frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
     pp->bitdepth            = get_bit_depth_from_seq(seq);
     pp->seq_profile         = seq->seq_profile;
@@ -151,7 +151,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
     for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
         AVFrame *ref_frame = h->ref[i].f;
         if (ref_frame->buf[0])
-            pp->RefFrameMapTextureIndex[i] = ff_dxva2_get_surface_index(avctx, ctx, ref_frame);
+            pp->RefFrameMapTextureIndex[i] = ff_dxva2_get_surface_index(avctx, ctx, ref_frame, 0);
     }
 
     /* Loop filter parameters */
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 20e64f848d..e0ec4878a7 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -48,19 +48,16 @@ static void fill_picture_entry(DXVA_PicEntry_H264 *pic,
     pic->bPicEntry = index | (flag << 7);
 }
 
-static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h,
+void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx,
                                     DXVA_PicParams_H264 *pp)
 {
+    const H264Context *h               = avctx->priv_data;
     const H264Picture *current_picture = h->cur_pic_ptr;
     const SPS *sps = h->ps.sps;
     const PPS *pps = h->ps.pps;
     int i, j;
 
     memset(pp, 0, sizeof(*pp));
-    /* Configure current picture */
-    fill_picture_entry(&pp->CurrPic,
-                       ff_dxva2_get_surface_index(avctx, ctx, current_picture->f),
-                       h->picture_structure == PICT_BOTTOM_FIELD);
     /* Configure the set of references */
     pp->UsedForReferenceFlags  = 0;
     pp->NonExistingFrameFlags  = 0;
@@ -75,7 +72,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
         }
         if (r) {
             fill_picture_entry(&pp->RefFrameList[i],
-                               ff_dxva2_get_surface_index(avctx, ctx, r->f),
+                               ff_dxva2_get_surface_index(avctx, ctx, r->f, 0),
                                r->long_ref != 0);
 
             if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX)
@@ -95,6 +92,10 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
             pp->FrameNumList[i]           = 0;
         }
     }
+    /* Configure current picture */
+    fill_picture_entry(&pp->CurrPic,
+                       ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1),
+                       h->picture_structure == PICT_BOTTOM_FIELD);
 
     pp->wFrameWidthInMbsMinus1        = h->mb_width  - 1;
     pp->wFrameHeightInMbsMinus1       = h->mb_height - 1;
@@ -164,9 +165,10 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
     //pp->SliceGroupMap[810];               /* XXX not implemented by FFmpeg */
 }
 
-static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm)
+void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm)
 {
-    const PPS *pps = h->ps.pps;
+    const H264Context *h   = avctx->priv_data;
+    const PPS         *pps = h->ps.pps;
     unsigned i, j;
     memset(qm, 0, sizeof(*qm));
     if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) {
@@ -253,9 +255,9 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
                 unsigned plane;
                 unsigned index;
                 if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
-                    index = ff_dxva2_get_surface_index(avctx, ctx, r->f);
+                    index = ff_dxva2_get_surface_index(avctx, ctx, r->f, 0);
                 else
-                    index = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, r->f));
+                    index = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, r->f, 0));
                 fill_picture_entry(&slice->RefPicList[list][i], index,
                                    sl->ref_list[list][i].reference == PICT_BOTTOM_FIELD);
                 for (plane = 0; plane < 3; plane++) {
@@ -454,10 +456,10 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx,
     assert(ctx_pic);
 
     /* Fill up DXVA_PicParams_H264 */
-    fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp);
+    ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp);
 
     /* Fill up DXVA_Qmatrix_H264 */
-    fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm);
+    ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm);
 
     ctx_pic->slice_count    = 0;
     ctx_pic->bitstream_size = 0;
diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c
index b6c08943f0..9e1d081412 100644
--- a/libavcodec/dxva2_hevc.c
+++ b/libavcodec/dxva2_hevc.c
@@ -79,7 +79,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
                                       (0                                  << 14) |
                                       (0                                  << 15);
 
-    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, current_picture->frame), 0);
+    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, current_picture->frame, 1), 0);
 
     pp->sps_max_dec_pic_buffering_minus1         = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
     pp->log2_min_luma_coding_block_size_minus3   = sps->log2_min_cb_size - 3;
@@ -171,7 +171,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
         }
 
         if (frame) {
-            fill_picture_entry(&pp->RefPicList[i], ff_dxva2_get_surface_index(avctx, ctx, frame->frame), !!(frame->flags & HEVC_FRAME_FLAG_LONG_REF));
+            fill_picture_entry(&pp->RefPicList[i], ff_dxva2_get_surface_index(avctx, ctx, frame->frame, 0), !!(frame->flags & HEVC_FRAME_FLAG_LONG_REF));
             pp->PicOrderCntValList[i] = frame->poc;
         } else {
             pp->RefPicList[i].bPicEntry = 0xff;
@@ -186,7 +186,7 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
             while (!frame && j < rpl->nb_refs) \
                 frame = rpl->ref[j++]; \
             if (frame && frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF)) \
-                pp->ref_list[i] = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, frame->frame)); \
+                pp->ref_list[i] = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, frame->frame, 0)); \
             else \
                 pp->ref_list[i] = 0xff; \
         } \
diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index b822af59cd..73bb954c6c 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h
@@ -26,18 +26,34 @@
 #define COBJMACROS
 
 #include "config.h"
+#include "config_components.h"
 
 /* define the proper COM entries before forcing desktop APIs */
 #include <objbase.h>
 
+#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards
+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work around for DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface
+
 #if CONFIG_DXVA2
 #include "dxva2.h"
 #include "libavutil/hwcontext_dxva2.h"
+#define DXVA2_VAR(ctx, var) ctx->dxva2.var
+#else
+#define DXVA2_VAR(ctx, var) 0
 #endif
+
 #if CONFIG_D3D11VA
 #include "d3d11va.h"
 #include "libavutil/hwcontext_d3d11va.h"
+#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var
+#else
+#define D3D11VA_VAR(ctx, var) 0
 #endif
+
+#if CONFIG_D3D12VA
+#include "d3d12va_decode.h"
+#endif
+
 #if HAVE_DXVA_H
 /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h
  * defines nothing. Force the struct definitions to be visible. */
@@ -62,6 +78,9 @@ typedef union {
 #if CONFIG_DXVA2
     struct dxva_context      dxva2;
 #endif
+#if CONFIG_D3D12VA
+    struct D3D12VADecodeContext d3d12va;
+#endif
 } AVDXVAContext;
 
 typedef struct FFDXVASharedContext {
@@ -101,43 +120,28 @@ typedef struct FFDXVASharedContext {
 #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va)
 #define DXVA2_CONTEXT(ctx)   (&ctx->dxva2)
 
-#if CONFIG_D3D11VA && CONFIG_DXVA2
-#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.workaround : ctx->dxva2.workaround)
-#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
-#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder)
-#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(ff_dxva2_is_d3d11(avctx) ? &ctx->d3d11va.report_id : &ctx->dxva2.report_id))
-#define DXVA_CONTEXT_CFG(avctx, ctx)            (ff_dxva2_is_d3d11(avctx) ? (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg)
-#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw)
-#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg->ConfigIntraResidUnsigned)
-#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg->ConfigResidDiffAccelerator)
+#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, var) : DXVA2_VAR(ctx, var)))
+
+#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*ff_dxva2_get_report_id(avctx, ctx))
+#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     DXVA2_CONTEXT_VAR(avctx, ctx, workaround)
+#define DXVA_CONTEXT_COUNT(avctx, ctx)          DXVA2_CONTEXT_VAR(avctx, ctx, surface_count)
+#define DXVA_CONTEXT_DECODER(avctx, ctx)        (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, decoder) : (void *)DXVA2_VAR(ctx, decoder)))
+#define DXVA_CONTEXT_CFG(avctx, ctx)            (avctx->pix_fmt == AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx, cfg) : (void *)DXVA2_VAR(ctx, cfg)))
+#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigBitstreamRaw)
+#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigIntraResidUnsigned)
+#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx, cfg->ConfigResidDiffAccelerator)
 #define DXVA_CONTEXT_VALID(avctx, ctx)          (DXVA_CONTEXT_DECODER(avctx, ctx) && \
                                                  DXVA_CONTEXT_CFG(avctx, ctx)     && \
-                                                 (ff_dxva2_is_d3d11(avctx) || ctx->dxva2.surface_count))
-#elif CONFIG_DXVA2
-#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ctx->dxva2.workaround)
-#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ctx->dxva2.surface_count)
-#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ctx->dxva2.decoder)
-#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(&ctx->dxva2.report_id))
-#define DXVA_CONTEXT_CFG(avctx, ctx)            (ctx->dxva2.cfg)
-#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ctx->dxva2.cfg->ConfigBitstreamRaw)
-#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg->ConfigIntraResidUnsigned)
-#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg->ConfigResidDiffAccelerator)
-#define DXVA_CONTEXT_VALID(avctx, ctx)          (ctx->dxva2.decoder && ctx->dxva2.cfg && ctx->dxva2.surface_count)
-#elif CONFIG_D3D11VA
-#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ctx->d3d11va.workaround)
-#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ctx->d3d11va.surface_count)
-#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ctx->d3d11va.decoder)
-#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(&ctx->d3d11va.report_id))
-#define DXVA_CONTEXT_CFG(avctx, ctx)            (ctx->d3d11va.cfg)
-#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ctx->d3d11va.cfg->ConfigBitstreamRaw)
-#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg->ConfigIntraResidUnsigned)
-#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg->ConfigResidDiffAccelerator)
-#define DXVA_CONTEXT_VALID(avctx, ctx)          (ctx->d3d11va.decoder && ctx->d3d11va.cfg)
+                                                 (ff_dxva2_is_d3d11(avctx) || DXVA2_VAR(ctx, surface_count)))
+
+#if CONFIG_D3D12VA
+unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx,
+                                      D3D12VADecodeContext *ctx, const AVFrame *frame,
+                                      int curr);
 #endif
 
 unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx,
-                                    const AVDXVAContext *,
-                                    const AVFrame *frame);
+                                    AVDXVAContext *, const AVFrame *frame, int curr);
 
 int ff_dxva2_commit_buffer(AVCodecContext *, AVDXVAContext *,
                            DECODER_BUFFER_DESC *,
@@ -161,4 +165,10 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx,
 
 int ff_dxva2_is_d3d11(const AVCodecContext *avctx);
 
+unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext *ctx);
+
+void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_H264 *pp);
+
+void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm);
+
 #endif /* AVCODEC_DXVA2_INTERNAL_H */
diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index 75c416654f..b739dc1e54 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c
@@ -49,14 +49,14 @@ static void fill_picture_parameters(AVCodecContext *avctx,
     int is_field = s->picture_structure != PICT_FRAME;
 
     memset(pp, 0, sizeof(*pp));
-    pp->wDecodedPictureIndex         = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f);
+    pp->wDecodedPictureIndex         = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1);
     pp->wDeblockedPictureIndex       = 0;
     if (s->pict_type != AV_PICTURE_TYPE_I)
-        pp->wForwardRefPictureIndex  = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f);
+        pp->wForwardRefPictureIndex  = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f, 0);
     else
         pp->wForwardRefPictureIndex  = 0xffff;
     if (s->pict_type == AV_PICTURE_TYPE_B)
-        pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f);
+        pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f, 0);
     else
         pp->wBackwardRefPictureIndex = 0xffff;
     pp->wPicWidthInMBminus1          = s->mb_width  - 1;
diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c
index b35fb115f7..8ee23feabf 100644
--- a/libavcodec/dxva2_vc1.c
+++ b/libavcodec/dxva2_vc1.c
@@ -58,13 +58,13 @@ static void fill_picture_parameters(AVCodecContext *avctx,
 
     memset(pp, 0, sizeof(*pp));
     pp->wDecodedPictureIndex    =
-    pp->wDeblockedPictureIndex  = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f);
+    pp->wDeblockedPictureIndex  = ff_dxva2_get_surface_index(avctx, ctx, current_picture->f, 1);
     if (s->pict_type != AV_PICTURE_TYPE_I && !v->bi_type)
-        pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f);
+        pp->wForwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->last_picture.f, 0);
     else
         pp->wForwardRefPictureIndex = 0xffff;
     if (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type)
-        pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f);
+        pp->wBackwardRefPictureIndex = ff_dxva2_get_surface_index(avctx, ctx, s->next_picture.f, 0);
     else
         pp->wBackwardRefPictureIndex = 0xffff;
     if (v->profile == PROFILE_ADVANCED) {
diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c
index eba4df9031..21699eb3f4 100644
--- a/libavcodec/dxva2_vp9.c
+++ b/libavcodec/dxva2_vp9.c
@@ -54,7 +54,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
 
     memset(pp, 0, sizeof(*pp));
 
-    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f), 0);
+    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f, 1), 0);
 
     pp->profile = h->h.profile;
     pp->wFormatAndPictureInfoFlags = ((h->h.keyframe == 0)   <<  0) |
@@ -81,7 +81,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
 
     for (i = 0; i < 8; i++) {
         if (h->refs[i].f->buf[0]) {
-            fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f), 0);
+            fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f, 0), 0);
             pp->ref_frame_coded_width[i]  = h->refs[i].f->width;
             pp->ref_frame_coded_height[i] = h->refs[i].f->height;
         } else
@@ -91,7 +91,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
     for (i = 0; i < 3; i++) {
         uint8_t refidx = h->h.refidx[i];
         if (h->refs[refidx].f->buf[0])
-            fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f), 0);
+            fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f, 0), 0);
         else
             pp->frame_refs[i].bPicEntry = 0xFF;
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 3b6c6c3592..8464a0b34c 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -784,6 +784,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 {
 #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
                      (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
+                     CONFIG_H264_D3D12VA_HWACCEL + \
                      CONFIG_H264_NVDEC_HWACCEL + \
                      CONFIG_H264_VAAPI_HWACCEL + \
                      CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
@@ -887,6 +888,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
             *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
             *fmt++ = AV_PIX_FMT_D3D11;
 #endif
+#if CONFIG_H264_D3D12VA_HWACCEL
+            *fmt++ = AV_PIX_FMT_D3D12;
+#endif
 #if CONFIG_H264_VAAPI_HWACCEL
             *fmt++ = AV_PIX_FMT_VAAPI;
 #endif
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 0bf05b2cfe..9f5893c512 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -1131,6 +1131,9 @@ const FFCodec ff_h264_decoder = {
 #if CONFIG_H264_D3D11VA2_HWACCEL
                                HWACCEL_D3D11VA2(h264),
 #endif
+#if CONFIG_H264_D3D12VA_HWACCEL
+                               HWACCEL_D3D12VA(h264),
+#endif
 #if CONFIG_H264_NVDEC_HWACCEL
                                HWACCEL_NVDEC(h264),
 #endif
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index c4630718cf..90b8beb0f5 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -30,6 +30,7 @@ extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
 extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_h264_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_h264_d3d11va2_hwaccel;
+extern const struct FFHWAccel ff_h264_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_h264_dxva2_hwaccel;
 extern const struct FFHWAccel ff_h264_nvdec_hwaccel;
 extern const struct FFHWAccel ff_h264_vaapi_hwaccel;
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index e164722a94..ee29ca631d 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -77,6 +77,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx);
     HW_CONFIG_HWACCEL(1, 1, 1, VULKAN,       VULKAN,       ff_ ## codec ## _vulkan_hwaccel)
 #define HWACCEL_D3D11VA(codec) \
     HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
+#define HWACCEL_D3D12VA(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 0, D3D12,        D3D12VA,      ff_ ## codec ## _d3d12va_hwaccel)
 
 #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
     &(const AVCodecHWConfigInternal) { \