/*Filename: autocorrRAN.sas */ /*Purpose: Computes Moran's I, Geary's C, and semivariance for specified */ /* spatial lags. For each lag, generates a randomization of data values,*/ /* computes Moran's I and Geary's C for each, then estimates p-values */ /* and 95% confidence limits from the randomization distribution */ /* plots them in a correlogram. The paramater NTRIALS sets the number */ /* of randomizations */ /*Last Update: FDN 2.15.07 */ /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use compton ; ******** supply dataset name; read all var{northing easting} into c; ******** names of northing (y) and easting (x) vars; read all var{whitepipes} into x; ******** name of z variable; nsteps= 10 ; ******** enter number of lags ; lagsize= 10 ; ******** enter lag size; ntrials =1000 ; ******** enter number of randomizations; /* define the necssary functions */ ******************************************************************************************** Function: CORRGRAM Description:compute correlograms for morans I and gearys C. Arguments: nsteps, x, logsize, distmat, diffmat, cpmat Returns: SEMIVAR,GC,MORAN,LAGD,MEANDIST,NEDGES ********************************************************************************************; start corrgram(nsteps,x,lagsize,distmat,diffmat,cpmat,SEMIVAR,GC,MORAN,LAGD,MEANDIST,NEDGES); gc=j(nsteps,1,0); lagd=gc; semivar=gc; nedges=gc; moran=gc; meandist=gc; n=nrow(x); z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); variance= ss/(n-1); do i=1 to nsteps; *compute for the i'th lag; lagd[i,]= i#lagsize; lh= (lagd[i,]-(lagsize/2)); hh= (lagd[i,]+(lagsize/2)); w= (distmat > lh) # (distmat <= hh); * binary weights matrix; sumw= sum(w); meandist[i,]= (sum(w#distmat))/sumw; *average diatnce between point pairs; semivar[i,]= ((sum(w#diffmat))/sumw)*.5; *semivariance; gc[i,]= semivar[i,]/variance; *gearys c; moran[i,] = ((sum(w#cpmat))/sumw)/ (ss/n); nedges[i,]= sumw/2; end; finish corrgram; ******************************************************************************************** Function: PERMUTE Description:Randomly permutes the order of the rows and cols of a symmteric matrix. Arguments: Dmat: the symmetric matrix. Returns: the randomly permuted matrix. ********************************************************************************************; START PERMUTE (dmat); n=nrow(dmat); ran= uniform(repeat(0,n,1)); order = rank(ran); newmat = dmat[order,order]; return (newmat); FINISH; **************************************************************************** Function: GETPVALUEANDCLs Description:finds the depth of an observed test statistic in a sampling distrbution. Arguments: HOSampDist: The sampling distribution of the test stat, under H0. teststat: The observed value of the test statistic. Returns: The p value associated with a one-tailed test of the statistic in question The upper and lower 95% CLs. ; start getpvalueandCLs(HOSampDist,teststat,PVALUE,UCL,LCL); nsamples=nrow(HOSampdist); rindex=rank(HOSampDist); sorted=j(nsamples,1,0); sorted[rindex,] = HOSampDist[1:Nsamples,]; LCL= sorted[int(((.025)#nsamples)+.5),]; UCL= sorted[int(((.975)#nsamples)+.5),]; fvalues=((1:nsamples)-.5)/Nsamples; do i=1 to nsamples; if i=1 then do ; if teststat <= sorted[i,] then pvalue=1; end; if (1 < i) & (i < nsamples) then do; if (sorted[i,] < teststat) & ( teststat <= sorted[i+1,]) then pvalue=1-fvalues[,i]; end; if i=nsamples then do; if sorted[i,] <= teststat then pvalue=0; end; end; finish getpvalueandCLs; ********************************************************************; /*do the business */ *dimension vars; n=nrow(x); distmat= j(n,n,0); diffmat=distmat; cpmat=distmat; gc_pvalue=j(nsteps,1,0); gc_ucl=gc_pvalue; gc_lcl=gc_pvalue; moran_pvalue=gc_pvalue; moran_ucl=gc_pvalue; moran_lcl=gc_pvalue; *build the distance matrices; mean = (sum(x)/n); do j=1 to n by 1; *get a matrix of distances between data points; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; diffmat[k,j]= (x[k,]-x[j,])##2; cpmat[k,j] = (x[k,]-mean)#(x[j,]-mean); end; end; *first compute the stats for the data ; call corrgram(nsteps,x,lagsize,distmat,diffmat,cpmat,SEMIVAR,GC,MORAN,LAGD,MEANDIST,NEDGES); *dimension matrices for the randomized stats; r_GCmat = j(ntrials,nsteps,0); r_MORANmat = j(ntrials,nsteps,0); * randomly permute the rows and cols of the distance matrix and ; * compute correlograms for each of the permutations ; do i = 1 to ntrials; r_dist =permute(distmat); call corrgram(nsteps,x,lagsize,r_dist,diffmat,cpmat,R_SEMIVAR,R_GC,R_MORAN,R_LAGD,R_MEANDIST,R_NEDGES); r_GCmat[i,] = R_gc`; r_MORANmat[i,] = R_moran`; end; do i=1 to nsteps; call getpvalueandCLs(r_gcmat[,i],GC[i,],pvalue,ucl,lcl); gc_pvalue[i,]=pvalue; gc_ucl[i,]=ucl; gc_lcl[i,]=lcl; call getpvalueandCLs(r_MORANmat[,i],MORAN[i,],pvalue,ucl,lcl); moran_pvalue[i,]=pvalue; moran_ucl[i,]=ucl; moran_lcl[i,]=lcl; end; result= lagd|| meandist || nedges || semivar||gc || gc_pvalue ||gc_ucl ||gc_lcl || moran || moran_pvalue || moran_ucl || moran_lcl ; label= {lagd meandist nedges semivar gearys_c gc_pvalue gc_ucl gc_lcl morans_i moran_pvalue moran_ucl moran_lcl}; create autocorr from result [colname=label]; append from result ; proc print; run; proc print; goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.3 htext=.2; symbol1 value=dot h=.2 color=grey i=join ; *specify the symbol to be used in plots; symbol2 value=none l=3 w=3 i=join color=blue; *specify the symbol to be used in plots; proc gplot data=autocorr; plot gearys_c * meandist=1 gc_ucl * meandist=2 gc_lcl * meandist=2 /overlay vref=1; plot morans_i* meandist =1 moran_ucl * meandist=2 moran_lcl * meandist=2 /overlay vref=0; run;