mirror of https://git.ffmpeg.org/ffmpeg.git
postproc: Made QP, nonBQP, and pQPb arrays
Also pulled QP initialization out of inner loop, which removed some redundent code. Added some dummy fields to PPContext to allow current code to work while changing the rest of the postprocessing code to support the arrays. I also increased alignment requirements for some fields in the PPContext struct to support future avx2 code. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
6264b6227c
commit
cbe27006ce
|
@ -143,8 +143,11 @@ typedef struct PPContext{
|
|||
DECLARE_ALIGNED(8, uint64_t, pQPb);
|
||||
DECLARE_ALIGNED(8, uint64_t, pQPb2);
|
||||
|
||||
DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
|
||||
DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
|
||||
DECLARE_ALIGNED(32, uint64_t, pQPb_block)[4];
|
||||
DECLARE_ALIGNED(32, uint64_t, pQPb2_block)[4];
|
||||
|
||||
DECLARE_ALIGNED(32, uint64_t, mmxDcOffset)[64];
|
||||
DECLARE_ALIGNED(32, uint64_t, mmxDcThreshold)[64];
|
||||
|
||||
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
|
||||
QP_STORE_T *nonBQPTable;
|
||||
|
@ -153,6 +156,9 @@ typedef struct PPContext{
|
|||
int QP;
|
||||
int nonBQP;
|
||||
|
||||
DECLARE_ALIGNED(32, int, QP_block)[4];
|
||||
DECLARE_ALIGNED(32, int, nonBQP_block)[4];
|
||||
|
||||
int frameNum;
|
||||
|
||||
int cpuCaps;
|
||||
|
|
|
@ -3479,7 +3479,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
|||
#endif
|
||||
const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
|
||||
int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
|
||||
int QP=0;
|
||||
int QP=0, nonBQP=0;
|
||||
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
|
||||
if not than use a temporary buffer */
|
||||
if(y+15 >= height){
|
||||
|
@ -3512,6 +3512,29 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
|||
int endx = FFMIN(width, x+32);
|
||||
uint8_t *dstBlockStart = dstBlock;
|
||||
const uint8_t *srcBlockStart = srcBlock;
|
||||
int qp_index = 0;
|
||||
for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){
|
||||
QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
|
||||
nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
|
||||
if(!isColor){
|
||||
QP= (QP* QPCorrecture + 256*128)>>16;
|
||||
nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
|
||||
yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
|
||||
}
|
||||
c.QP_block[qp_index] = QP;
|
||||
c.nonBQP_block[qp_index] = nonBQP;
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
|
||||
"movq %%mm7, %0 \n\t"
|
||||
: "=m" (c.pQPb_block[qp_index])
|
||||
: "r" (QP)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
for(; x < endx; x+=BLOCK_SIZE){
|
||||
RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
|
||||
RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
|
||||
|
@ -3543,27 +3566,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
|||
dstBlock = dstBlockStart;
|
||||
srcBlock = srcBlockStart;
|
||||
|
||||
for(x = startx; x < endx; x+=BLOCK_SIZE){
|
||||
for(x = startx, qp_index = 0; x < endx; x+=BLOCK_SIZE, qp_index++){
|
||||
const int stride= dstStride;
|
||||
QP = QPptr[x>>qpHShift];
|
||||
c.nonBQP = nonBQPptr[x>>qpHShift];
|
||||
if(!isColor){
|
||||
QP= (QP* QPCorrecture + 256*128)>>16;
|
||||
c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
|
||||
yHistogram[srcBlock[srcStride*12 + 4]]++;
|
||||
}
|
||||
c.QP= QP;
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
|
||||
"movq %%mm7, %0 \n\t"
|
||||
: "=m" (c.pQPb)
|
||||
: "r" (QP)
|
||||
);
|
||||
#endif
|
||||
//temporary while changing QP stuff to make things continue to work
|
||||
//eventually QP,nonBQP,etc will be arrays and this will be unnecessary
|
||||
c.QP = c.QP_block[qp_index];
|
||||
c.nonBQP = c.nonBQP_block[qp_index];
|
||||
c.pQPb = c.pQPb_block[qp_index];
|
||||
c.pQPb2 = c.pQPb2_block[qp_index];
|
||||
|
||||
/* only deblock if we have 2 blocks */
|
||||
if(y + 8 < height){
|
||||
if(mode & V_X1_FILTER)
|
||||
|
@ -3587,30 +3598,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
|||
dstBlock = dstBlockStart;
|
||||
srcBlock = srcBlockStart;
|
||||
|
||||
for(x = startx; x < endx; x+=BLOCK_SIZE){
|
||||
for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){
|
||||
const int stride= dstStride;
|
||||
av_unused uint8_t *tmpXchg;
|
||||
|
||||
if(isColor){
|
||||
QP= QPptr[x>>qpHShift];
|
||||
c.nonBQP= nonBQPptr[x>>qpHShift];
|
||||
}else{
|
||||
QP= QPptr[x>>4];
|
||||
QP= (QP* QPCorrecture + 256*128)>>16;
|
||||
c.nonBQP= nonBQPptr[x>>4];
|
||||
c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
|
||||
}
|
||||
c.QP= QP;
|
||||
c.QP = c.QP_block[qp_index];
|
||||
c.nonBQP = c.nonBQP_block[qp_index];
|
||||
c.pQPb = c.pQPb_block[qp_index];
|
||||
c.pQPb2 = c.pQPb2_block[qp_index];
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
|
||||
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
|
||||
"movq %%mm7, %0 \n\t"
|
||||
: "=m" (c.pQPb)
|
||||
: "r" (QP)
|
||||
);
|
||||
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
|
||||
#endif
|
||||
/* check if we have a previous block to deblock it with dstBlock */
|
||||
|
@ -3632,7 +3627,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
|||
|
||||
#else
|
||||
if(mode & H_X1_FILTER)
|
||||
horizX1Filter(dstBlock-4, stride, QP);
|
||||
horizX1Filter(dstBlock-4, stride, c.QP);
|
||||
else if(mode & H_DEBLOCK){
|
||||
#if TEMPLATE_PP_ALTIVEC
|
||||
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
|
||||
|
|
Loading…
Reference in New Issue