mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-21 23:03:13 +00:00
a18eff49c0
about 110 cpu cycles before 60 cpu cycles afterwards. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
185 lines
6.8 KiB
C
185 lines
6.8 KiB
C
/*
|
|
* Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
* miscellaneous math routines and tables
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <limits.h>
|
|
#include "mathematics.h"
|
|
#include "libavutil/common.h"
|
|
|
|
const uint8_t ff_sqrt_tab[256]={
|
|
0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
|
|
91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
|
|
157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
|
|
182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
|
|
203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
|
|
222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
|
|
240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
|
|
};
|
|
|
|
const uint8_t ff_log2_tab[256]={
|
|
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
|
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
|
|
};
|
|
|
|
const uint8_t av_reverse[256]={
|
|
0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
|
|
0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
|
|
0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
|
|
0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
|
|
0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
|
|
0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
|
|
0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
|
|
0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
|
|
0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
|
|
0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
|
|
0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
|
|
0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
|
|
0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
|
|
0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
|
|
0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
|
|
0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
|
|
};
|
|
|
|
int64_t av_gcd(int64_t a, int64_t b){
|
|
if(b) return av_gcd(b, a%b);
|
|
else return a;
|
|
}
|
|
|
|
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){
|
|
int64_t r=0;
|
|
assert(c > 0);
|
|
assert(b >=0);
|
|
assert((unsigned)rnd<=5 && rnd!=4);
|
|
|
|
if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1));
|
|
|
|
if(rnd==AV_ROUND_NEAR_INF) r= c/2;
|
|
else if(rnd&1) r= c-1;
|
|
|
|
if(b<=INT_MAX && c<=INT_MAX){
|
|
if(a<=INT_MAX)
|
|
return (a * b + r)/c;
|
|
else
|
|
return a/c*b + (a%c*b + r)/c;
|
|
}else{
|
|
#if 1
|
|
uint64_t a0= a&0xFFFFFFFF;
|
|
uint64_t a1= a>>32;
|
|
uint64_t b0= b&0xFFFFFFFF;
|
|
uint64_t b1= b>>32;
|
|
uint64_t t1= a0*b1 + a1*b0;
|
|
uint64_t t1a= t1<<32;
|
|
int i;
|
|
|
|
a0 = a0*b0 + t1a;
|
|
a1 = a1*b1 + (t1>>32) + (a0<t1a);
|
|
a0 += r;
|
|
a1 += a0<r;
|
|
|
|
for(i=63; i>=0; i--){
|
|
// int o= a1 & 0x8000000000000000ULL;
|
|
a1+= a1 + ((a0>>i)&1);
|
|
t1+=t1;
|
|
if(/*o || */c <= a1){
|
|
a1 -= c;
|
|
t1++;
|
|
}
|
|
}
|
|
return t1;
|
|
}
|
|
#else
|
|
AVInteger ai;
|
|
ai= av_mul_i(av_int2i(a), av_int2i(b));
|
|
ai= av_add_i(ai, av_int2i(r));
|
|
|
|
return av_i2int(av_div_i(ai, av_int2i(c)));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int64_t av_rescale(int64_t a, int64_t b, int64_t c){
|
|
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
|
|
}
|
|
|
|
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq){
|
|
int64_t b= bq.num * (int64_t)cq.den;
|
|
int64_t c= cq.num * (int64_t)bq.den;
|
|
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
|
|
}
|
|
|
|
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b){
|
|
int64_t a= tb_a.num * (int64_t)tb_b.den;
|
|
int64_t b= tb_b.num * (int64_t)tb_a.den;
|
|
if((FFABS(ts_a)|a|FFABS(ts_b)|b)<=INT_MAX)
|
|
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
|
|
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b) return -1;
|
|
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a) return 1;
|
|
return 0;
|
|
}
|
|
|
|
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod){
|
|
int64_t c= (a-b) & (mod-1);
|
|
if(c > (mod>>1))
|
|
c-= mod;
|
|
return c;
|
|
}
|
|
|
|
#ifdef TEST
|
|
#include "integer.h"
|
|
#undef printf
|
|
int main(void){
|
|
int64_t a,b,c,d,e;
|
|
|
|
for(a=7; a<(1LL<<62); a+=a/3+1){
|
|
for(b=3; b<(1LL<<62); b+=b/4+1){
|
|
for(c=9; c<(1LL<<62); c+=(c*2)/5+3){
|
|
int64_t r= c/2;
|
|
AVInteger ai;
|
|
ai= av_mul_i(av_int2i(a), av_int2i(b));
|
|
ai= av_add_i(ai, av_int2i(r));
|
|
|
|
d= av_i2int(av_div_i(ai, av_int2i(c)));
|
|
|
|
e= av_rescale(a,b,c);
|
|
|
|
if((double)a * (double)b / (double)c > (1LL<<63))
|
|
continue;
|
|
|
|
if(d!=e) printf("%"PRId64"*%"PRId64"/%"PRId64"= %"PRId64"=%"PRId64"\n", a, b, c, d, e);
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|