/* * Principal component analysis * Copyright (c) 2004 Michael Niedermayer * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /** * @file pca.c * Principal component analysis */ #include "common.h" #include "pca.h" PCA *ff_pca_init(int n){ PCA *pca; if(n<=0) return NULL; pca= av_mallocz(sizeof(PCA)); pca->n= n; pca->count=0; pca->covariance= av_mallocz(sizeof(double)*n*n); pca->mean= av_mallocz(sizeof(double)*n); return pca; } void ff_pca_free(PCA *pca){ av_freep(&pca->covariance); av_freep(&pca->mean); av_free(pca); } void ff_pca_add(PCA *pca, double *v){ int i, j; const int n= pca->n; for(i=0; imean[i] += v[i]; for(j=i; jcovariance[j + i*n] += v[i]*v[j]; } pca->count++; } int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){ int i, j, k, pass; const int n= pca->n; double z[n]; memset(eigenvector, 0, sizeof(double)*n*n); for(j=0; jmean[j] /= pca->count; eigenvector[j + j*n] = 1.0; for(i=0; i<=j; i++){ pca->covariance[j + i*n] /= pca->count; pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j]; pca->covariance[i + j*n] = pca->covariance[j + i*n]; } eigenvalue[j]= pca->covariance[j + j*n]; z[j]= 0; } for(pass=0; pass < 50; pass++){ double sum=0; for(i=0; icovariance[j + i*n]); if(sum == 0){ for(i=0; i maxvalue){ maxvalue= eigenvalue[j]; k= j; } } eigenvalue[k]= eigenvalue[i]; eigenvalue[i]= maxvalue; for(j=0; jcovariance[j + i*n]; double t,c,s,tau,theta, h; if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3 continue; if(fabs(covar) == 0.0) //FIXME shouldnt be needed continue; if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){ pca->covariance[j + i*n]=0.0; continue; } h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]); theta=0.5*h/covar; t=1.0/(fabs(theta)+sqrt(1.0+theta*theta)); if(theta < 0.0) t = -t; c=1.0/sqrt(1+t*t); s=t*c; tau=s/(1.0+c); z[i] -= t*covar; z[j] += t*covar; #define ROTATE(a,i,j,k,l) {\ double g=a[j + i*n];\ double h=a[l + k*n];\ a[j + i*n]=g-s*(h+g*tau);\ a[l + k*n]=h+s*(g-h*tau); } for(k=0; kcovariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j)) } ROTATE(eigenvector,k,i,k,j) } pca->covariance[j + i*n]=0.0; } } for (i=0; i #include int main(){ PCA *pca; int i, j, k; #define LEN 8 double eigenvector[LEN*LEN]; double eigenvalue[LEN]; pca= ff_pca_init(LEN); for(i=0; i<9000000; i++){ double v[2*LEN+100]; double sum=0; int pos= random()%LEN; int v2= (random()%101) - 50; v[0]= (random()%101) - 50; for(j=1; j<8; j++){ if(j<=pos) v[j]= v[0]; else v[j]= v2; sum += v[j]; } /* for(j=0; jcount= 1; pca->mean[i]= 0; // (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x| // pca.covariance[i + i*LEN]= pow(0.5, fabs for(j=i; jcovariance[i + j*LEN]); } printf("\n"); } #if 1 for(i=0; icovariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN]; } v[j] /= eigenvalue[i]; error += fabs(v[j] - eigenvector[i + j*LEN]); } printf("%f ", error); } printf("\n"); #endif for(i=0; i