/*Filename: MayaLoess.sas */ /*Purpose: Fit a Loess surface to terminal monument dates, then check out */ /* the qq-plot, surface plot, and correlograms of residuals. */ /*Last Update: FDN 2.08.07 */ /*import the dataset */ PROC IMPORT OUT= WORK.maya DATAFILE= "H:\www\anth589b\mayadata.dbf" DBMS=DBF REPLACE; GETDELETED=NO; RUN; /*start by setting the graphics options --Note that names of windows fonts are given in quotes*/ goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.3 htext=.2; symbol1 value=dot h=.2 i=join ; *specify the symbol to be used in plots; /* get a grid of points at which to predict the date values */ data PredPoints; do sn = 0 to 500 by 5; do we = 0 to 500 by 5; output; end; end; proc loess data =maya; ods Output OutputStatistics=StatOut ScoreResults=LoessSurface ; model date_ad_ = sn we / degree=2 select=gcv ; score data=predpoints; run; /*plot the loess surface*/ *data LoessSurface ; set LoessSurface; *get rid of points out the study area; *if p_date_ad_ ne .; data mayasites1; *set up files for annotating the plot; set maya; y=sn; x=we; FUNCTION='SYMBOL'; COLOR='BLACK'; size=.2; text='CIRCLE'; XSYS='2'; YSYS='2'; OUTPUT; data mayasites2; set maya; text=site; y=sn; x=we; function = 'LABEL'; COLOR='BLACK'; position='6'; style='SWISS'; size=.5; XSYS='2'; YSYS='2'; OUTPUT; data mayasites3; set mayasites1 mayasites2; proc gcontour data =LoessSurface; plot sn*we = p_date_ad_ / anno=mayasites3 levels= 750 to 900 by 10 llevels= 33 33 33 33 33 1 1 1 1 1; /*look at the residuals */ data statout; set statout; residual= depvar-pred; proc univariate data=statout; qqplot residual; run; /*contour map of the residuals */ proc g3grid data=statout out=quickanddirtysurface; grid sn*we = residual /join axis1=0 to 500 by 5 axis2=0 to 500 by 5 ; run; proc gcontour data = quickanddirtysurface; plot sn*we = residual / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; run; /* look at the variogram of residuals */ /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use statout ; ******** supply dataset name; read all var{sn we} into c; ******** names of northing (y) and easting (x) vars; read all var{residual} into x; ******** name of z variable; nsteps= 10 ; ******** enter number of lags ; lag= 20 ; ******** enter lag size; n=nrow(x); distmat= j(n,n,0); gc=j(nsteps,1,0); lagd=gc; semivar=gc; *dimension vars; nedges=gc; moran=gc; diffmat=distmat; cpmat=distmat; meandist=gc; mean = (sum(x)/n); print mean; do j=1 to n by 1; *get a matrix of distances between data points; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; diffmat[k,j]= (x[k,]-x[j,])##2; cpmat[k,j] = (x[k,]-mean)#(x[j,]-mean); end; end; z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); variance= ss/(n-1); print variance z distmat diffmat cpmat; do i=1 to nsteps; *compute for the i'th lag; lh=((i-1)#lag)+.001; hh=(i#lag)+.001; w= (lh < distmat) & (distmat <= hh); * binary weights matrix; lagd[i,]= i#lag; sumw= sum(w); meandist[i,]= (sum(w#distmat))/sumw; *average diatnce between point pairs; semivar[i,]= ((sum(w#diffmat))/sumw)*.5; *semivariance; gc[i,]= semivar[i,]/variance; *gearys c; moran[i,] = ((sum(w#cpmat))/sumw)/ (ss/n); nedges[i,]= sumw/2; print i w; end; result= semivar||gc|| moran || lagd|| meandist || nedges; label= {semivar gearys_c morans_i lagdist meandist nedges}; create autocorr from result [colname=label]; append from result ; data autocorr; set autocorr; logmeandist = log10(meandist); logsemivar=log10(semivar); proc print; proc gplot data=autocorr; plot gearys_c * meandist=1; plot semivar*meandist =1; plot morans_i*meandist =1; run;