/*Filename: LISA.sas */ /*Purpose: Computes Gestis-Ord G, Local Moran I, Local Semivariance, Gearys C, */ /* and type of neighborhood-location relationshipnfor a spatially distributed */ /* variable. Signifiance levels estimated via randomization. */ /* Plots statistic values, types, and significance levels in space */ /*Last Update: FDN 2.26.07 */ option ps=40 ls=100; /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use maya; ******** supply dataset name; read all var{sn we} into c; ******** names of northing (y) and easting (x) vars; read all var{date_ad_} into x; ******** name of z variable; read all var{site} into label; ******** name of an id variable for each obs ; laglow = 60 ; ******* enter lower bound of spatial lag (>) ; laghigh = 80 ; ******** enter upper bound of spatial lag (<=); ntrials =1000 ; ******** enter number of randomizations; /* define the necssary functions */ ******************************************************************************************** Function: LocalAC Description:compute Local G, I, C, and semivariance for an observation and its neighborhood Arguments: i, wtvec,z,variance Returns: G, SEMIVAR,GC,MORAN,MEANDIST,NEDGES, G_TYPE, M_TYPE) ********************************************************************************************; start localAC(i, wtvec,x, variance, G, SEMIVAR, GC,MORAN,NEDGES, G_type, M_TYPE); n=nrow(x); z= x-(sum(x)/n); *deviations from mean; sumw= sum(wtvec); wtvecs =wtvec/sumw; index = loc( i ^= (1:n)); *getis-ord g; g_mean = sum(x[index,])/(n-1); g_numrtr = wtvec*x - sum(wtvec)#g_mean; g_std = ( sum( (x[index,]-g_mean) ##2) / (n-1) )##.5; g_denmntr = (( g_std # ((n - 1)# sum(wtvec##2)) - sum(wtvec)##2)/(n-2))##.5; g = g_numrtr/g_denmntr; semivar= sum(wtvecs*(x[i,]- x)##2) #.5 ;*semivariance; gc = semivar /variance; *gearys c; wtz = wtvecs*z; moran = (sum (z[i,]#wtz))/ variance; *morans I; nedges = sumw; if (z[i] > wtz) then G_type ='HL'; if (z[i] < wtz) then G_type ='LH'; if (z[i] > 0) & (wtz > 0) then M_type ='HH'; if (z[i] > 0) & (wtz < 0) then M_type ='HL'; if (z[i] < 0) & (wtz > 0) then M_type ='LH'; if (z[i] < 0) & (wtz < 0) then M_type ='LL'; finish localAC; ******************************************************************************************** Function: PERMUTEvec Description:Randomly permutes the cols of a row vector Arguments: wtvec: a 0/1 vector of weights. Returns: a randomly pemuted version of wtvec. ********************************************************************************************; START PERMUTEVEC (wtvec); n=ncol(wtvec); seed = 0; c = j(n,1,seed); b = uniform(c); order = rank(b); newvec = wtvec[,order]; return (newvec); FINISH; **************************************************************************** Function: GETPVALUE Description:finds the depth of an observed test statistic in a sampling distrbution. Arguments: HOSampDist: The sampling distribution of the test stat, under H0. Ntrials: The number of trials in the randomization. teststat: The observed value of the test statistic. Returns: The p value associated with a one-tailed test of the statistic in question; start getpvalue(HOSampDist,ntrials,teststat); pvalue=.; nsamples=nrow(HOSampdist); rindex=rank(HOSampDist); sorted=j(nsamples,1,0); sorted[rindex,] = HOSampDist[1:Nsamples,]; fvalues=((1:nsamples)-.5)/Nsamples; middle = (nsamples+1)/2; do i = middle to 1 by -1; if (i > 1) then do; if (teststat < sorted[i,] ) & ( teststat >= sorted[i-1,]) then pvalue= fvalues[,i-1]; end; if i=1 then do ; if teststat < sorted[1,] then pvalue= 1/(ntrials+1); end; end; do i = middle to nsamples by 1; if (i < nsamples) then do; if (teststat > sorted[i, ]) & ( teststat <= sorted[i+1,]) then pvalue=1-fvalues[,i+1]; end; if i=nsamples then do ; if teststat > sorted[nsamples,] then pvalue=1/(ntrials+1); end; end; if teststat = sorted[middle,] then pvalue=.5; return (pvalue); finish getpvalue; ********************************************************************; /*do the business */ *dimension vars; n=nrow(x); Moran_type = j(n,1,'XX'); GC_Type = Moran_type; distmat= j(n,n,0); Gearys_c = j(n,1,.); go_g=Gearys_c; semivariance=Gearys_c; morans_i=Gearys_c; gc_pvalue=Gearys_c; moran_pvalue=Gearys_c; go_g_pvalue=Gearys_c; nedgesvec =Gearys_c; meandist=Gearys_c; z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); s2= ss/(n-1); *The variance; *build the distance matrix; mean = (sum(x)/n); do j=1 to n by 1; *get a matrix of distances between data points and their weights; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; end; end; wtmat = (laglow < distmat) & (distmat <= laghigh); if mod(ntrials,2)=0 then ntrials=ntrials+1; *make sure ntrials is odd; sumwtmat= sum(wtmat); *print distmat wtmat sumwtmat; *dimension vectors for the randomized stats; r_Gvec = j(ntrials,1,.); r_GCvec = r_Gvec ; r_MORANvec = r_Gvec ; do i=1 to n; *get the stats for each data point; wtvec= wtmat[i,]; if sum(wtvec) > 0 then do; *minimum number of points in the neighborhood; call localAC(i, wtvec, x, S2 , g, SEMIVAR, GC, MORAN, NEDGES, G_type, M_TYPE); go_g[i,]= g; SEMIVARiance[i,] = semivar; Gearys_c[i,] = gc; MORANs_i[i,]= moran; NEDGESvec[i,]= nedges; Moran_type[i,]= M_type; GC_type[i,] =G_type; do j=1 to ntrials; * do the randomizations; r_wtvec=permutevec(wtvec); CALL localAC(i, r_wtvec, x, S2 ,r_g, x1, r_GC, r_MORAN, x2, x3, x4); r_Gvec[j,] = r_g; r_GCvec[j,] = r_GC; *build the randomization distribution; r_MORANvec[j,]= r_MORAN; end; go_g_pvalue[i,]= getpvalue(r_Gvec,ntrials,G); gc_pvalue[i,]= getpvalue(r_GCvec,ntrials,GC); moran_pvalue[i,]= getpvalue(r_MORANvec,ntrials,MORAN); end; end; y_coord=c[,1]; x_coord=c[,2]; create lisa var{label y_coord x_coord nedgesvec go_g go_g_pvalue semivariance gearys_c gc_pvalue gc_type morans_i moran_pvalue moran_type}; append; proc print data=lisa; run; /* Plot the getis-ord g stats. positive values in orange-red, negative ones in blue-purple*/ /* Significance levels shown by color hues. */ /* NOTE: Change the pvalues as you think appropriate for the number of observations */ data mayasitenames; * create a SAS "annotate" file that will allow us to plot the site names; set maya; length text $ 16; text=site; y=sn; x=we; function = 'LABEL '; COLOR='BLACK '; position='6'; style='SWISS '; size=1; XSYS='2'; YSYS='2'; OUTPUT; data GetisOrdMap; *create a SAS annotate file that has the colored symbols for the stat values;; set lisa; length text $ 16; length color $ 16; y=Y_coord; x=x_coord; FUNCTION='SYMBOL'; size= abs(go_g)*.5; if go_g <= 0 then do; if go_g_pvalue >.05 then do; text='circle'; color='lightblue'; end; if go_g_pvalue <=.05 then do; text='dot' ; color='lightblue'; end; if go_g_pvalue <=.001 then do; text='dot' ; color='blue'; end; if go_g_pvalue <=.0001 then do; text='dot' ; color='purple'; end; end; if go_g > 0 then do; if go_g_pvalue >.05 then do; text='circle'; color='orange'; end; if go_g_pvalue <=.05 then do; text='dot' ; color='orange'; end; if go_g_pvalue <=.001 then do; text='dot' ; color='red'; end; if go_g_pvalue <=.0001 then do; text='dot' ; color='darkred'; end; end; if go_g_pvalue = . then text=' '; XSYS='2'; YSYS='2'; OUTPUT; data GetisOrdMap; set GetisOrdMap mayasitenames; *combine the maya site names and the stats; title1 'Getis-Ord G'; proc ganno annotate=getisordmap datasys; run; /* Plot the geary's c stats. HL values in red, LH values in blue. */ /* Significance levels inversely proportional to symbol size. */ data GearysCMap; set lisa; length text $ 16; length color $ 16; y=Y_coord; x=x_coord; FUNCTION='SYMBOL'; size= log(1/gc_pvalue); if gc_type = 'HL' then do; text='circle'; color= 'red'; end; if gc_type = 'LH' then do; text='circle'; color= 'blue'; end; if gc_type = 'HL' then do; if gc_pvalue <= .05 then do; text='dot'; color= 'red'; end; end; if gc_type = 'LH' then do; if gc_pvalue <= .05 then do; text='dot'; color= 'blue'; end; end; XSYS='2'; YSYS='2'; OUTPUT; data GearysCMap; set GearysCMap mayasitenames; *combine the maya site names and the stats; title 'Local Morans I Map'; proc ganno annotate=gearysCMap datasys; run; /* Plot the Morans I stats. HL values in light blue, LH values in blue, */ /* LL values in pink, HH values in red */ /* Significance levels inversely proportional to symbol size. Shading by P-value. */ data MoransIMap; set lisa; length text $ 16; length color $ 16; y=Y_coord; x=x_coord; FUNCTION='SYMBOL'; size= log(1/moran_pvalue); *the default; if morans_i < 0 and moran_pvalue > .05 then do; text='circle'; color='red'; end; if morans_i > 0 and moran_pvalue > .05 then do; text='circle'; color='blue'; end; if moran_type = 'HL' and moran_pvalue <= .05 then do; text='dot'; color='lightblue'; end; if moran_type = 'LH' and moran_pvalue <= .05 then do; text='dot'; color='blue'; end; if moran_type = 'LL' and moran_pvalue <= .05 then do; text='dot'; color='pink'; end; if moran_type = 'HH' and moran_pvalue <= .05 then do; text='dot'; color='red'; end; XSYS='2'; YSYS='2'; OUTPUT; data MoransIMap; set MoransIMap mayasitenames; *combine the maya site names and the stats; title1 'Local Moran''s I'; proc ganno annotate=MoransIMap datasys; run;