/* Filename: CAExample.sas */ /* Purpose: IML code that demonstrates CA */ /* Last Update: FDN 4.30.02 */ proc iml; x= {5 10 20 , 10 40 5, 40 5 0, 50 1 0}; *create the data matrix; print x; p= x/sum(x); *get the matrix of proportions; print p; colsums =p[+,] ; rowsums = p[,+] ; print colsums rowsums; expected= rowsums*colsums; *compute the expected proportions -- as in contingency table; q= (p-expected)/(expected##.5); *deviations from the expecteds, divide by the square root of the expecteds ; print q; *this should look familiar! ; cov=q`*q; *cov matrix --errr... sort of--- just like PCA; print cov; *so the sum of the diganols give the total amount of variation * aka "inertia"; call eigen(l,u,cov); *compute the eigenvalues (l) and eigenvectors; *eigenvalues are "inertia accounted for" by each axis; print 'eigenvalues' l 'eigenvectors' u; *note that one eigen vector is 0 -- why? ; *now compute the coordinates of the rows and columns in their joint spaces ; *do the columns first; dcols=diag(p[+,]##-.5); *Get a diagonal matrix of the inverse of the sq. roots of the col sums; print dcols u; v=dcols*u; *rescale each eigenvector element by the inverse of sq root of its col sum; *note that this gives less influence to cols with big sums; print 'the coordinates of the columns in the 2-d CA space' v ; *now get the row scores ; drows=diag(p[,+]##-1); rowprofiles=drows*p; *the rowprofiles are conditional probabilities ~ row perecnts; print drows p rowprofiles; f= rowprofiles*v ; *compute the row scores; print 'the row coordinates as weighted avarages of the colunm coordinates' f;