/* Filename: Mantel.sas Purpose: SAS code to compute two distance matrices and compare them. The assemblage distance matrix is regressed on the geographical distance matrix. We compute a standardized regression coefficient. Hypothesis testing is done via randomization. Finally the STANDARDIZED distances used in the regrssion are output and plotted. Last Update: FDN 4.8.02 */ title1 'Mantel Test'; /* first prepare the datasets to go into IML -- they MUST be sorted on Group */ proc sort data=braun1; by tclass group; proc sort data=Coords; by group ; proc iml; /* first we define the functions that well need*/ ************************************************************************** Function: EUCDIST Description:Compute Euclidean distance between two row vectors. Arguments: X1,X2: the two row vectors. pct: set to 1 if you want X1 and X2 coverted to relative frequencies first. Returns: Euclidean distance between X1 and X2. ; start eucdist(x1,x2,pct); if pct=1 then do; x1=x1/sum(x1); x2=x2/sum(x2); end; dist= sqrt((x1-x2)*(x1-x2)`); return(dist); finish eucdist; ****************************************************************************; ************************************************************************** Function: AssemDISTMAT Description: Compute a Euclidean distance matrix for the rows of an input dataset. Converts the rows to relative freqs first. Arguments: XMAT: the input data set ROWNAME: a character variable that names the rows Returns: Dmat: the distance matrix FromMat: A matrix of row labels for the distances ToMat Another matrix of row labels; START assemdistmat(xmat,rowname,DMAT,FROMMAT,TOMAT); n=nrow(xmat); dmat=j(n,n,0); FromMat= cshape(' ',n,n,8); ToMat=FromMat; do i=1 to n by 1; do j=1 to n by 1; dmat[i,j]= eucdist(xmat[i,],xmat[j,],1); FromMat[i,j]=rowname[i,]; ToMat[i,j]=rowname[j,]; end; end; FINISH assemdistmat ; ******************************************************************************************************; ****************************************************************************************************** Function: GeoDISTMAT Description: Compute a Euclidean distance matrix for the rows of an input dataset. Arguments: XMAT: the input data set ROWNAME: a character variable that names the rows Returns: Dmat: the distance matrix FromMat: A matrix of row labels for the distances ToMat Another matrix of row labels; START geodistmat(xmat,rowname,DMAT,FROMMAT,TOMAT); n=nrow(xmat); dmat=j(n,n,0); FromMat= cshape(' ',n,n,8); ToMat=FromMat; do i=1 to n by 1; do j=1 to n by 1; dmat[i,j]= eucdist(xmat[i,],xmat[j,],0);*note the 0 argument -- no percentages please!; FromMat[i,j]=rowname[i,]; ToMat[i,j]=rowname[j,]; end; end; FINISH geodistmat ; ****************************************************************************************************** Function: MATTOCOL Description:Puts the lower triangle of a symmetric matrix in a col vector Arguments: Dmat -- the symmetric matrix Returns: A column vector; START MATTOCOL (dmat); n=nrow(dmat); distvec= dmat[{1},{2}:n]`; do i={2} to (n-{1}); distvec=distvec//( dmat[i,(i+{1}):n])`; end; return (distvec); FINISH MATTOCOL; ******************************************************************************************************; ******************************************************************************************************* Function: PERMUTE Description:Randomly permutes the order of the rows and cols of a symmteric matrix. Arguments: Dmat: the symmetric matrix. Returns: the randomly permuted matrix.; START PERMUTE (dmat); n=nrow(dmat); ran= uniform(repeat(0,n,1)); order = rank(ran); newmat = dmat[order,order]; return (newmat); FINISH; *****************************************************************************************************; ****************************************************************************************************** Function: REG Description:Computes a standardized regression coefficient for two distance matrices. Arguments: X: independent variable matrix Y: dependent variable matrix Returns: a standardized Beta; START reg (y, x); y1=standard(mattocol(y)); x1=standard(mattocol(x)); n=nrow(x1); xmat= j(nrow(x1),1,1) || x1; xpx= xmat`*xmat; xpy = xmat`*y1; xpxi = inv(xpx); b= xpxi*xpy; return (b`[,2]); FINISH reg; *****************************************************************************************************; **************************************************************************** Function: GETPVALUE Description:finds the depth of an observed test statistic in a sampling distrbution. Arguments: HOSampDist: The sampling distribution of the test stat, under H0. teststat: The observed value of the test statistic. Returns: The p value associated with a one-tailed test of the statistic in question; start getpvalue(HOSampDist,teststat); nsamples=nrow(HOSampdist); rindex=rank(HOSampDist); sorted=j(nsamples,1,0); sorted[rindex,] = HOSampDist[1:Nsamples,]; fvalues=((1:nsamples)-.5)/Nsamples; do i=1 to nsamples; if i=1 then do ; if teststat <= sorted then pvalue=1; end; if (1 < i) & (i < nsamples) then do; if (sorted[i,] < teststat) & ( teststat <= sorted[i+1,]) then pvalue=1-fvalues[,i]; end; if i=nsamples then do; if sorted[i,] <= teststat then pvalue=0; end; end; return(pvalue); finish getpvalue; ********************************************************************; /* OK lets use the functions that we have defined */ use braun1; read all into xmat [rowname=group colname=motifs] where (tclass='mm02_4'); *read the assemblage data into IML -- note the where clause to pick the right tclass; use Coords; *read the spatial coordinate data; read all into coords [rowname=group colname=dims]; call assemdistmat(xmat,group,ASSEMDIST,aFROMMAT,aTOMAT); *compute the assemblage distances; call geodistmat(coords,group,GEODIST,cFROMMAT,cTOMAT); *compute the geodistances; beta= reg(assemdist,geodist); *do the regression; print 'The observed betas (standardized)' beta; *print the results; /* now compute the randomization distribution of the beta estimates */ iter=1000; ranbetas = j(iter,1,0); do i=1 to iter; ranassemdist= permute(assemdist); ranbetas[i,] = reg(ranassemdist,geodist); end; pvalue=getpvalue(ranbetas,beta); *find the pvalue; print 'Probability of a greater value (one tailed)' pvalue; /*output the distances */ y1=standard(mattocol(assemdist)); x1=standard(mattocol(geodist)); distances =y1||x1; fromvec=mattocol(afrommat) ; tovec=mattocol(atomat); fromtovec= concat(fromvec,tovec); label= {'assemdist' 'geodist'}; create distances from distances [rowname=fromtovec colname=label]; append from distances [rowname=fromtovec colname=label]; close distances; /* output the randomization ditribution */ create ranbetas from ranbetas; append from ranbetas; close ranbetas; title2 'standardized distances (u=0, s=1)'; proc print data=distances; proc plot data=distances; plot assemdist*geodist = '*' $ fromtovec; run; proc univariate normal plot data=ranbetas; run;