###################################################################### ###################################################################### ###################################################################### ###### ###### ###### PRINCIPAL COMPONENTS ANALYSIS / ###### ###### MULTIDIMENSIONAL SCALING ###### ###### ###### ###### Author: Abraham Rashin ###### ###### Date: May 10, 2006 ###### ###### Language: Maple 10 ###### ###### Dependencies: LinearAlgebra, RandomTools ###### ###### References: See below. ###### ###### ###### ###### ###### ###################################################################### ###################################################################### ###################################################################### with(LinearAlgebra): Help:=proc() print(`dist2covar(D), pca(C,p)`): print(`rfloat(), rvec(d), rvec2(d,P), rorth(d), rpts(d,P,n)`): print(`pts2dist(L), pts2covar(L)`): end: ############################### #### PCA FUNCTIONS #### ############################### # Generates best covariance Matrix C given distance Matrix D. dist2covar:=proc(D) local i,j,n,u,A,b,C: n:=ColumnDimension(D): b:=Vector((n*n+n)/2): for i from 1 to n do for j from i+1 to n do b[ij2k(n,i,j)]:=D[i,j]^2: od: od: A:=matr1(n): b:=LeastSquares(A,b): C:=Matrix(n): for i from 1 to n do for j from i to n do u:=b[ij2k(n,i,j)]: C[i,j]:=u: C[j,i]:=u: od: od: C: end: # Generates list of n p-Vectors from covariance Matrix C via PCA. pca:=proc(C,p) local n,u,v,L: u:=SingularValues(C,output=['U','S']): n:=ColumnDimension(u[1]): u:=convert(u[1][1..n,1..p].Matrix(u[2][1..p],shape=diagonal),listlist): L:=NULL: for v in u do L:=L,Vector(v): od: [L]: end: #### HELPER FUNCTIONS #### # Helper function: Performs variable indexing for dist2covar and matr1. ij2k:=proc(n,i,j) local u: u:=n+i-j: ((u*u-u)/2)+i: end: # Helper function: Generates Matrix used in dist2covar. matr1:=proc(n) local i,j,u,tii,tjj,t,A: u:=(n*n+n)/2: A:=Matrix(u): for i from 1 to n do for j from i+1 to n do tii:=ij2k(n,i,i): tjj:=ij2k(n,j,j): t:=ij2k(n,i,j): A[t,tii]:=1: A[t,tjj]:=1: A[t,t]:=-2: A[tii,t]:=1: A[tjj,t]:=1: od: od: for i from 1 to n do A[ij2k(n,i,i),ij2k(n,i,i)]:=1: od: A: end: ############################### #### TEST FUNCTIONS #### ############################### #### RANDOM FUNCTIONS #### # Note that these random functions fail sometimes, but with very low probability. # Generates a random floating point number in [0,1]. rfloat:=proc() RandomTools[Generate](float(range=0..1,method=uniform)): end: # Generates a random Vector in [0,1]^d. rvec:=proc(d) Vector(d,rfloat): end: # Generates a random Vector in R^d # with range in component k given by -P[k]..P[k] for k in Z, 1<=k<=d. rvec2:=proc(d,P) local i,u: u:=NULL: for i from 1 to d do u:=u,-P[i]+2*P[i]*rfloat(): od: Vector([u]): end: # Generates a random orthogonal Matrix in O_d(R). Warning: Roundoff error. rorth:=proc(d) local i,L: L:=NULL: for i from 1 to d do L:=L,rvec(d): od: L:=GramSchmidt([L],normalized): Matrix(L): end: # Generates a sample of points in R^d that is suitable for PCA. rpts:=proc(d,P,n) local i,L,Q: Q:=rorth(d): L:=NULL: for i from 1 to n do L:=L,Q.rvec2(d,P): od: [L]: end: #### OTHER TEST FUNCTIONS #### # Generates distance Matrix D from list L of Vectors. pts2dist:=proc(L) local i,j,n,D,v,d: n:=nops(L): D:=Matrix(n): for i from 1 to n do for j from 1 to i-1 do v:=L[i]-L[j]: d:=sqrt(v.v): D[i,j]:=d: D[j,i]:=d: od: od: D: end: # Generates covariance Matrix C (c_i_j=L[i].L[j]) from list L of Vectors. pts2covar:=proc(L) local i,j,v,u,n,d,C,c: n:=nops(L): d:=Dimension(L[1]): v:=Vector(d): for u in L do v:=v+u: od: v:=(1/n).v: C:=Matrix(n): for i from 1 to n do for j from 1 to i do c:=(L[i]-v).(L[j]-v): C[i,j]:=c: C[j,i]:=c: od: od: C: end: # References: # Sources about classical PCA -- used here: # 1. http://www.psc.edu/biomed/training/workshops/2000/NMR/dg_intro.pdf # 2. http://en.wikipedia.org/wiki/Principal_components_analysis # Sources about PCA on manifolds (Isomap) -- just for fun, or for real statistics: # 3. JB Tenenbaum, V de Silva, JC Langford. Science. 290, 2319 (2000). # 4. http://web.mit.edu/cocosci/Papers/Balasubramanian-etal-2003.pdf