/* * Copyright (C) 2019 Philip Langdale * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ __device__ static const int coef_lf[2] = { 4309, 213 }; __device__ static const int coef_hf[3] = { 5570, 3801, 1016 }; __device__ static const int coef_sp[2] = { 5077, 981 }; template __inline__ __device__ T max3(T a, T b, T c) { T x = max(a, b); return max(x, c); } template __inline__ __device__ T min3(T a, T b, T c) { T x = min(a, b); return min(x, c); } template __inline__ __device__ T clip(T a, T min, T max) { if (a < min) { return min; } else if (a > max) { return max; } else { return a; } } template __inline__ __device__ T filter_intra(T cur_prefs3, T cur_prefs, T cur_mrefs, T cur_mrefs3, int clip_max) { int final = (coef_sp[0] * (cur_mrefs + cur_prefs) - coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13; return clip(final, 0, clip_max); } template __inline__ __device__ T filter(T cur_prefs3, T cur_prefs, T cur_mrefs, T cur_mrefs3, T prev2_prefs4, T prev2_prefs2, T prev2_0, T prev2_mrefs2, T prev2_mrefs4, T prev_prefs, T prev_mrefs, T next_prefs, T next_mrefs, T next2_prefs4, T next2_prefs2, T next2_0, T next2_mrefs2, T next2_mrefs4, int clip_max) { T final; int c = cur_mrefs; int d = (prev2_0 + next2_0) >> 1; int e = cur_prefs; int temporal_diff0 = abs(prev2_0 - next2_0); int temporal_diff1 = (abs(prev_mrefs - c) + abs(prev_prefs - e)) >> 1; int temporal_diff2 = (abs(next_mrefs - c) + abs(next_prefs - e)) >> 1; int diff = max3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); if (!diff) { final = d; } else { int b = ((prev2_mrefs2 + next2_mrefs2) >> 1) - c; int f = ((prev2_prefs2 + next2_prefs2) >> 1) - e; int dc = d - c; int de = d - e; int mmax = max3(de, dc, min(b, f)); int mmin = min3(de, dc, max(b, f)); diff = max3(diff, mmin, -mmax); int interpol; if (abs(c - e) > temporal_diff0) { interpol = (((coef_hf[0] * (prev2_0 + next2_0) - coef_hf[1] * (prev2_mrefs2 + next2_mrefs2 + prev2_prefs2 + next2_prefs2) + coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + next2_mrefs4)) >> 2) + coef_lf[0] * (c + e) - coef_lf[1] * (cur_mrefs3 + cur_prefs3)) >> 13; } else { interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13; } if (interpol > d + diff) { interpol = d + diff; } else if (interpol < d - diff) { interpol = d - diff; } final = clip(interpol, 0, clip_max); } return final; } template __inline__ __device__ void bwdif_single(T *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { // Identify location int xo = blockIdx.x * blockDim.x + threadIdx.x; int yo = blockIdx.y * blockDim.y + threadIdx.y; if (xo >= dst_width || yo >= dst_height) { return; } // Don't modify the primary field if (yo % 2 == parity) { dst[yo*dst_pitch+xo] = tex2D(cur, xo, yo); return; } T cur_prefs3 = tex2D(cur, xo, yo + 3); T cur_prefs = tex2D(cur, xo, yo + 1); T cur_mrefs = tex2D(cur, xo, yo - 1); T cur_mrefs3 = tex2D(cur, xo, yo - 3); if (is_field_end) { dst[yo*dst_pitch+xo] = filter_intra(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, clip_max); return; } // Calculate temporal prediction int is_second_field = !(parity ^ tff); cudaTextureObject_t prev2 = prev; cudaTextureObject_t prev1 = is_second_field ? cur : prev; cudaTextureObject_t next1 = is_second_field ? next : cur; cudaTextureObject_t next2 = next; T prev2_prefs4 = tex2D(prev2, xo, yo + 4); T prev2_prefs2 = tex2D(prev2, xo, yo + 2); T prev2_0 = tex2D(prev2, xo, yo + 0); T prev2_mrefs2 = tex2D(prev2, xo, yo - 2); T prev2_mrefs4 = tex2D(prev2, xo, yo - 4); T prev_prefs = tex2D(prev1, xo, yo + 1); T prev_mrefs = tex2D(prev1, xo, yo - 1); T next_prefs = tex2D(next1, xo, yo + 1); T next_mrefs = tex2D(next1, xo, yo - 1); T next2_prefs4 = tex2D(next2, xo, yo + 4); T next2_prefs2 = tex2D(next2, xo, yo + 2); T next2_0 = tex2D(next2, xo, yo + 0); T next2_mrefs2 = tex2D(next2, xo, yo - 2); T next2_mrefs4 = tex2D(next2, xo, yo - 4); dst[yo*dst_pitch+xo] = filter(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, prev2_prefs4, prev2_prefs2, prev2_0, prev2_mrefs2, prev2_mrefs4, prev_prefs, prev_mrefs, next_prefs, next_mrefs, next2_prefs4, next2_prefs2, next2_0, next2_mrefs2, next2_mrefs4, clip_max); } template __inline__ __device__ void bwdif_double(T *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { int xo = blockIdx.x * blockDim.x + threadIdx.x; int yo = blockIdx.y * blockDim.y + threadIdx.y; if (xo >= dst_width || yo >= dst_height) { return; } if (yo % 2 == parity) { // Don't modify the primary field dst[yo*dst_pitch+xo] = tex2D(cur, xo, yo); return; } T cur_prefs3 = tex2D(cur, xo, yo + 3); T cur_prefs = tex2D(cur, xo, yo + 1); T cur_mrefs = tex2D(cur, xo, yo - 1); T cur_mrefs3 = tex2D(cur, xo, yo - 3); if (is_field_end) { T final; final.x = filter_intra(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x, clip_max); final.y = filter_intra(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y, clip_max); dst[yo*dst_pitch+xo] = final; return; } int is_second_field = !(parity ^ tff); cudaTextureObject_t prev2 = prev; cudaTextureObject_t prev1 = is_second_field ? cur : prev; cudaTextureObject_t next1 = is_second_field ? next : cur; cudaTextureObject_t next2 = next; T prev2_prefs4 = tex2D(prev2, xo, yo + 4); T prev2_prefs2 = tex2D(prev2, xo, yo + 2); T prev2_0 = tex2D(prev2, xo, yo + 0); T prev2_mrefs2 = tex2D(prev2, xo, yo - 2); T prev2_mrefs4 = tex2D(prev2, xo, yo - 4); T prev_prefs = tex2D(prev1, xo, yo + 1); T prev_mrefs = tex2D(prev1, xo, yo - 1); T next_prefs = tex2D(next1, xo, yo + 1); T next_mrefs = tex2D(next1, xo, yo - 1); T next2_prefs4 = tex2D(next2, xo, yo + 4); T next2_prefs2 = tex2D(next2, xo, yo + 2); T next2_0 = tex2D(next2, xo, yo + 0); T next2_mrefs2 = tex2D(next2, xo, yo - 2); T next2_mrefs4 = tex2D(next2, xo, yo - 4); T final; final.x = filter(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x, prev2_prefs4.x, prev2_prefs2.x, prev2_0.x, prev2_mrefs2.x, prev2_mrefs4.x, prev_prefs.x, prev_mrefs.x, next_prefs.x, next_mrefs.x, next2_prefs4.x, next2_prefs2.x, next2_0.x, next2_mrefs2.x, next2_mrefs4.x, clip_max); final.y = filter(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y, prev2_prefs4.y, prev2_prefs2.y, prev2_0.y, prev2_mrefs2.y, prev2_mrefs4.y, prev_prefs.y, prev_mrefs.y, next_prefs.y, next_mrefs.y, next2_prefs4.y, next2_prefs2.y, next2_0.y, next2_mrefs2.y, next2_mrefs4.y, clip_max); dst[yo*dst_pitch+xo] = final; } extern "C" { __global__ void bwdif_uchar(unsigned char *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { bwdif_single(dst, prev, cur, next, dst_width, dst_height, dst_pitch, src_width, src_height, parity, tff, is_field_end, clip_max); } __global__ void bwdif_ushort(unsigned short *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { bwdif_single(dst, prev, cur, next, dst_width, dst_height, dst_pitch, src_width, src_height, parity, tff, is_field_end, clip_max); } __global__ void bwdif_uchar2(uchar2 *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { bwdif_double(dst, prev, cur, next, dst_width, dst_height, dst_pitch, src_width, src_height, parity, tff, is_field_end, clip_max); } __global__ void bwdif_ushort2(ushort2 *dst, cudaTextureObject_t prev, cudaTextureObject_t cur, cudaTextureObject_t next, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, int parity, int tff, int is_field_end, int clip_max) { bwdif_double(dst, prev, cur, next, dst_width, dst_height, dst_pitch, src_width, src_height, parity, tff, is_field_end, clip_max); } } /* extern "C" */