mirror of https://git.ffmpeg.org/ffmpeg.git
310 lines
12 KiB
Plaintext
310 lines
12 KiB
Plaintext
/*
|
|
* Copyright (C) 2019 Philip Langdale <philipl@overt.org>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
__device__ static const int coef_lf[2] = { 4309, 213 };
|
|
__device__ static const int coef_hf[3] = { 5570, 3801, 1016 };
|
|
__device__ static const int coef_sp[2] = { 5077, 981 };
|
|
|
|
template<typename T>
|
|
__inline__ __device__ T max3(T a, T b, T c)
|
|
{
|
|
T x = max(a, b);
|
|
return max(x, c);
|
|
}
|
|
|
|
template<typename T>
|
|
__inline__ __device__ T min3(T a, T b, T c)
|
|
{
|
|
T x = min(a, b);
|
|
return min(x, c);
|
|
}
|
|
|
|
template<typename T>
|
|
__inline__ __device__ T clip(T a, T min, T max)
|
|
{
|
|
if (a < min) {
|
|
return min;
|
|
} else if (a > max) {
|
|
return max;
|
|
} else {
|
|
return a;
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
__inline__ __device__ T filter_intra(T cur_prefs3, T cur_prefs,
|
|
T cur_mrefs, T cur_mrefs3,
|
|
int clip_max)
|
|
{
|
|
int final = (coef_sp[0] * (cur_mrefs + cur_prefs) -
|
|
coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13;
|
|
return clip(final, 0, clip_max);
|
|
}
|
|
|
|
template<typename T>
|
|
__inline__ __device__ T filter(T cur_prefs3, T cur_prefs, T cur_mrefs, T cur_mrefs3,
|
|
T prev2_prefs4, T prev2_prefs2, T prev2_0, T prev2_mrefs2, T prev2_mrefs4,
|
|
T prev_prefs, T prev_mrefs, T next_prefs, T next_mrefs,
|
|
T next2_prefs4, T next2_prefs2, T next2_0, T next2_mrefs2, T next2_mrefs4,
|
|
int clip_max)
|
|
{
|
|
T final;
|
|
|
|
int c = cur_mrefs;
|
|
int d = (prev2_0 + next2_0) >> 1;
|
|
int e = cur_prefs;
|
|
|
|
int temporal_diff0 = abs(prev2_0 - next2_0);
|
|
int temporal_diff1 = (abs(prev_mrefs - c) + abs(prev_prefs - e)) >> 1;
|
|
int temporal_diff2 = (abs(next_mrefs - c) + abs(next_prefs - e)) >> 1;
|
|
int diff = max3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2);
|
|
|
|
if (!diff) {
|
|
final = d;
|
|
} else {
|
|
int b = ((prev2_mrefs2 + next2_mrefs2) >> 1) - c;
|
|
int f = ((prev2_prefs2 + next2_prefs2) >> 1) - e;
|
|
int dc = d - c;
|
|
int de = d - e;
|
|
int mmax = max3(de, dc, min(b, f));
|
|
int mmin = min3(de, dc, max(b, f));
|
|
diff = max3(diff, mmin, -mmax);
|
|
|
|
int interpol;
|
|
if (abs(c - e) > temporal_diff0) {
|
|
interpol = (((coef_hf[0] * (prev2_0 + next2_0)
|
|
- coef_hf[1] * (prev2_mrefs2 + next2_mrefs2 + prev2_prefs2 + next2_prefs2)
|
|
+ coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + next2_mrefs4)) >> 2)
|
|
+ coef_lf[0] * (c + e) - coef_lf[1] * (cur_mrefs3 + cur_prefs3)) >> 13;
|
|
} else {
|
|
interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13;
|
|
}
|
|
|
|
if (interpol > d + diff) {
|
|
interpol = d + diff;
|
|
} else if (interpol < d - diff) {
|
|
interpol = d - diff;
|
|
}
|
|
final = clip(interpol, 0, clip_max);
|
|
}
|
|
|
|
return final;
|
|
}
|
|
|
|
template<typename T>
|
|
__inline__ __device__ void bwdif_single(T *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff,
|
|
int is_field_end, int clip_max)
|
|
{
|
|
// Identify location
|
|
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
if (xo >= dst_width || yo >= dst_height) {
|
|
return;
|
|
}
|
|
|
|
// Don't modify the primary field
|
|
if (yo % 2 == parity) {
|
|
dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo);
|
|
return;
|
|
}
|
|
|
|
T cur_prefs3 = tex2D<T>(cur, xo, yo + 3);
|
|
T cur_prefs = tex2D<T>(cur, xo, yo + 1);
|
|
T cur_mrefs = tex2D<T>(cur, xo, yo - 1);
|
|
T cur_mrefs3 = tex2D<T>(cur, xo, yo - 3);
|
|
|
|
if (is_field_end) {
|
|
dst[yo*dst_pitch+xo] =
|
|
filter_intra(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, clip_max);
|
|
return;
|
|
}
|
|
|
|
// Calculate temporal prediction
|
|
int is_second_field = !(parity ^ tff);
|
|
|
|
cudaTextureObject_t prev2 = prev;
|
|
cudaTextureObject_t prev1 = is_second_field ? cur : prev;
|
|
cudaTextureObject_t next1 = is_second_field ? next : cur;
|
|
cudaTextureObject_t next2 = next;
|
|
|
|
T prev2_prefs4 = tex2D<T>(prev2, xo, yo + 4);
|
|
T prev2_prefs2 = tex2D<T>(prev2, xo, yo + 2);
|
|
T prev2_0 = tex2D<T>(prev2, xo, yo + 0);
|
|
T prev2_mrefs2 = tex2D<T>(prev2, xo, yo - 2);
|
|
T prev2_mrefs4 = tex2D<T>(prev2, xo, yo - 4);
|
|
T prev_prefs = tex2D<T>(prev1, xo, yo + 1);
|
|
T prev_mrefs = tex2D<T>(prev1, xo, yo - 1);
|
|
T next_prefs = tex2D<T>(next1, xo, yo + 1);
|
|
T next_mrefs = tex2D<T>(next1, xo, yo - 1);
|
|
T next2_prefs4 = tex2D<T>(next2, xo, yo + 4);
|
|
T next2_prefs2 = tex2D<T>(next2, xo, yo + 2);
|
|
T next2_0 = tex2D<T>(next2, xo, yo + 0);
|
|
T next2_mrefs2 = tex2D<T>(next2, xo, yo - 2);
|
|
T next2_mrefs4 = tex2D<T>(next2, xo, yo - 4);
|
|
|
|
dst[yo*dst_pitch+xo] = filter(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3,
|
|
prev2_prefs4, prev2_prefs2, prev2_0, prev2_mrefs2, prev2_mrefs4,
|
|
prev_prefs, prev_mrefs, next_prefs, next_mrefs,
|
|
next2_prefs4, next2_prefs2, next2_0, next2_mrefs2, next2_mrefs4,
|
|
clip_max);
|
|
}
|
|
|
|
template <typename T>
|
|
__inline__ __device__ void bwdif_double(T *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff,
|
|
int is_field_end, int clip_max)
|
|
{
|
|
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
if (xo >= dst_width || yo >= dst_height) {
|
|
return;
|
|
}
|
|
|
|
if (yo % 2 == parity) {
|
|
// Don't modify the primary field
|
|
dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo);
|
|
return;
|
|
}
|
|
|
|
T cur_prefs3 = tex2D<T>(cur, xo, yo + 3);
|
|
T cur_prefs = tex2D<T>(cur, xo, yo + 1);
|
|
T cur_mrefs = tex2D<T>(cur, xo, yo - 1);
|
|
T cur_mrefs3 = tex2D<T>(cur, xo, yo - 3);
|
|
|
|
if (is_field_end) {
|
|
T final;
|
|
final.x = filter_intra(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x,
|
|
clip_max);
|
|
final.y = filter_intra(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y,
|
|
clip_max);
|
|
dst[yo*dst_pitch+xo] = final;
|
|
return;
|
|
}
|
|
|
|
int is_second_field = !(parity ^ tff);
|
|
|
|
cudaTextureObject_t prev2 = prev;
|
|
cudaTextureObject_t prev1 = is_second_field ? cur : prev;
|
|
cudaTextureObject_t next1 = is_second_field ? next : cur;
|
|
cudaTextureObject_t next2 = next;
|
|
|
|
T prev2_prefs4 = tex2D<T>(prev2, xo, yo + 4);
|
|
T prev2_prefs2 = tex2D<T>(prev2, xo, yo + 2);
|
|
T prev2_0 = tex2D<T>(prev2, xo, yo + 0);
|
|
T prev2_mrefs2 = tex2D<T>(prev2, xo, yo - 2);
|
|
T prev2_mrefs4 = tex2D<T>(prev2, xo, yo - 4);
|
|
T prev_prefs = tex2D<T>(prev1, xo, yo + 1);
|
|
T prev_mrefs = tex2D<T>(prev1, xo, yo - 1);
|
|
T next_prefs = tex2D<T>(next1, xo, yo + 1);
|
|
T next_mrefs = tex2D<T>(next1, xo, yo - 1);
|
|
T next2_prefs4 = tex2D<T>(next2, xo, yo + 4);
|
|
T next2_prefs2 = tex2D<T>(next2, xo, yo + 2);
|
|
T next2_0 = tex2D<T>(next2, xo, yo + 0);
|
|
T next2_mrefs2 = tex2D<T>(next2, xo, yo - 2);
|
|
T next2_mrefs4 = tex2D<T>(next2, xo, yo - 4);
|
|
|
|
T final;
|
|
final.x = filter(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x,
|
|
prev2_prefs4.x, prev2_prefs2.x, prev2_0.x, prev2_mrefs2.x, prev2_mrefs4.x,
|
|
prev_prefs.x, prev_mrefs.x, next_prefs.x, next_mrefs.x,
|
|
next2_prefs4.x, next2_prefs2.x, next2_0.x, next2_mrefs2.x, next2_mrefs4.x,
|
|
clip_max);
|
|
final.y = filter(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y,
|
|
prev2_prefs4.y, prev2_prefs2.y, prev2_0.y, prev2_mrefs2.y, prev2_mrefs4.y,
|
|
prev_prefs.y, prev_mrefs.y, next_prefs.y, next_mrefs.y,
|
|
next2_prefs4.y, next2_prefs2.y, next2_0.y, next2_mrefs2.y, next2_mrefs4.y,
|
|
clip_max);
|
|
|
|
dst[yo*dst_pitch+xo] = final;
|
|
}
|
|
|
|
extern "C" {
|
|
|
|
__global__ void bwdif_uchar(unsigned char *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff, int is_field_end, int clip_max)
|
|
{
|
|
bwdif_single(dst, prev, cur, next,
|
|
dst_width, dst_height, dst_pitch,
|
|
src_width, src_height,
|
|
parity, tff, is_field_end, clip_max);
|
|
}
|
|
|
|
__global__ void bwdif_ushort(unsigned short *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff, int is_field_end, int clip_max)
|
|
{
|
|
bwdif_single(dst, prev, cur, next,
|
|
dst_width, dst_height, dst_pitch,
|
|
src_width, src_height,
|
|
parity, tff, is_field_end, clip_max);
|
|
}
|
|
|
|
__global__ void bwdif_uchar2(uchar2 *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff, int is_field_end, int clip_max)
|
|
{
|
|
bwdif_double(dst, prev, cur, next,
|
|
dst_width, dst_height, dst_pitch,
|
|
src_width, src_height,
|
|
parity, tff, is_field_end, clip_max);
|
|
}
|
|
|
|
__global__ void bwdif_ushort2(ushort2 *dst,
|
|
cudaTextureObject_t prev,
|
|
cudaTextureObject_t cur,
|
|
cudaTextureObject_t next,
|
|
int dst_width, int dst_height, int dst_pitch,
|
|
int src_width, int src_height,
|
|
int parity, int tff, int is_field_end, int clip_max)
|
|
{
|
|
bwdif_double(dst, prev, cur, next,
|
|
dst_width, dst_height, dst_pitch,
|
|
src_width, src_height,
|
|
parity, tff, is_field_end, clip_max);
|
|
}
|
|
|
|
} /* extern "C" */
|