ffmpeg/libavcodec/vulkan/ffv1_enc_setup.comp

153 lines
4.6 KiB
Plaintext

/*
* FFv1 codec
*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
{
uint mpw = 1 << chroma_shift;
uint awidth = align(width, mpw);
if ((version < 4) || ((version == 4) && (micro_version < 3)))
return width * sx / num_h_slices;
sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
if (sx == awidth)
sx = width;
return sx;
}
void init_slice(out SliceContext sc, const uint slice_idx)
{
/* Set coordinates */
uvec2 img_size = imageSize(src[0]);
uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
gl_NumWorkGroups.x, chroma_shift.x);
uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
gl_NumWorkGroups.x, chroma_shift.x);
uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
gl_NumWorkGroups.y, chroma_shift.y);
uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
gl_NumWorkGroups.y, chroma_shift.y);
sc.slice_pos = ivec2(sxs, sys);
sc.slice_dim = ivec2(sxe - sxs, sye - sys);
sc.slice_rct_coef = ivec2(1, 1);
sc.slice_coding_mode = int(force_pcm == 1);
rac_init(sc.c,
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
slice_size_max);
}
void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
{
put_rac_norenorm(c, state, bit);
if (c.range < 0x100)
renorm_encoder_full(c);
}
void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
{
bool is_nil = (v == 0);
put_rac_full(c, state, is_nil);
if (is_nil)
return;
const int e = findMSB(v);
state += 1;
for (int i = 0; i < e; i++)
put_rac_full(c, state + min(i, 9), true);
put_rac_full(c, state + min(e, 9), false);
state += 21;
for (int i = e - 1; i >= 0; i--)
put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
}
void write_slice_header(inout SliceContext sc, uint64_t state)
{
u8buf sb = u8buf(state);
[[unroll]]
for (int i = 0; i < CONTEXT_SIZE; i++)
sb[i].v = uint8_t(128);
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
put_symbol_unsigned(sc.c, state, 0);
put_symbol_unsigned(sc.c, state, 0);
for (int i = 0; i < codec_planes; i++)
put_symbol_unsigned(sc.c, state, context_model);
put_symbol_unsigned(sc.c, state, pic_mode);
put_symbol_unsigned(sc.c, state, sar.x);
put_symbol_unsigned(sc.c, state, sar.y);
if (version >= 4) {
put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
if (sc.slice_coding_mode != 1 && colorspace == 1) {
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
}
}
}
void write_frame_header(inout SliceContext sc, uint64_t state)
{
u8buf sb = u8buf(state);
sb.v = uint8_t(128);
put_rac_full(sc.c, state, bool(key_frame));
}
#ifdef GOLOMB
void init_golomb(inout SliceContext sc)
{
sc.hdr_len = rac_terminate(sc.c);
init_put_bits(sc.pb,
OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len),
slice_size_max - sc.hdr_len);
}
#endif
void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
/* Write slice data */
uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
u8buf sb = u8buf(scratch_state);
init_slice(slice_ctx[slice_idx], slice_idx);
if (slice_idx == 0)
write_frame_header(slice_ctx[slice_idx], scratch_state);
write_slice_header(slice_ctx[slice_idx], scratch_state);
#ifdef GOLOMB
init_golomb(slice_ctx[slice_idx]);
#endif
}