/*Filename: MayaTrendSurface.sas */ /*Purpose: Fit a series of trend surfaces to terminal monument dates, then check out */ /* the qq-plot, surface plot, and correlograms of residuals. */ /*Last Update: FDN 2.08.07 */ /*import the dataset */ PROC IMPORT OUT= WORK.maya DATAFILE= "H:\www\anth589b\mayadata.dbf" DBMS=DBF REPLACE; GETDELETED=NO; RUN; /*start by setting the graphics options --Note that names of windows fonts are given in quotes*/ goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.3 htext=.2; symbol1 value= circle h=.2; /*print the data */ proc print data=maya; run; /*check out the date variable's distribution*/ prov univariate data=maya; var date_ad_; histogram date_ad_/ cfill=orange; qqplot date_ad_ /normal(mu=est sigma=est color=blue l=1 w=2); run; proc gplot; plot sn*we; run; proc g3grid data=maya out=quickanddirtysurface; grid sn*we = date_ad_ /join axis1=0 to 500 by 5 axis2=0 to 500 by 5 ; run; data mayasites1; set maya; y=sn; x=we; FUNCTION='SYMBOL'; COLOR='BLACK'; size=.2; text='CIRCLE'; XSYS='2'; YSYS='2'; OUTPUT; drop sn we; data mayasites2; set maya; text=site; y=sn; x=we; function = 'LABEL'; COLOR='BLACK'; position='6'; style='SWISS'; size=.5; XSYS='2'; YSYS='2'; OUTPUT; data mayasites3; set mayasites1 mayasites2; proc gcontour data =quickanddirtysurface; plot sn*we = date_ad_ / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; run; /* try a linear trend surface */ proc glm data=maya; model date_ad_ = sn we / solution; /*generate a grid of x-y coordinate paits and predict the z values at them */ data xygrid; do sn=0 to 500 by 5; do we=0 to 500 by 5; output; end; end; data trendsurface; set xygrid; zhat = 781.3479969 + 0.0454919*sn + 0.0444018*we; proc gcontour data =trendsurface; plot sn*we = zhat / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; /* try a quadratic trend surface */ proc glm data=maya; model date_ad_ = sn we sn*sn we*we sn*we / solution; output out=quadratic p=pred r=residual; run; data trendsurface2; set xygrid; zhat = 1171.968962 + -1.250578 * sn + -1.346289 * we + 0.000787 *sn*sn + 0.001148 *we*we + 0.002494 *sn*we; proc gcontour data =trendsurface2; plot sn*we = zhat / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; /* lets look at the residuals */ proc univariate data=quadratic; histogram residual/ cfill=orange; qqplot residual/normal(mu=est sigma=est color=blue l=1 w=2); run; /*contour map of the residuals */ proc g3grid data=quadratic out=quickanddirtysurface; grid sn*we = residual /join axis1=0 to 500 by 5 axis2=0 to 500 by 5 ; run; proc gcontour data = quickanddirtysurface; plot sn*we = residual / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; /*check the correlograms */ /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use quadratic ; ******** supply dataset name; read all var{sn we} into c; ******** names of northing (y) and easting (x) vars; read all var{residual} into x; ******** name of z variable; nsteps= 10 ; ******** enter number of lags ; lag= 20 ; ******** enter lag size; n=nrow(x); distmat= j(n,n,0); gc=j(nsteps,1,0); lagd=gc; semivar=gc; *dimension vars; nedges=gc; moran=gc; diffmat=distmat; cpmat=distmat; meandist=gc; mean = (sum(x)/n); do j=1 to n by 1; *get a matrix of distances between data points; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; diffmat[k,j]= (x[k,]-x[j,])##2; cpmat[k,j] = (x[k,]-mean)#(x[j,]-mean); end; end; z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); variance= ss/(n-1); print variance distmat diffmat cpmat; do i=1 to nsteps; *compute for the i'th lag; lh=((i-1)#lag)+.001; hh=(i#lag)+.001; w= (lh < distmat) & (distmat <= hh); * binary weights matrix; lagd[i,]= i#lag; sumw= sum(w); meandist[i,]= (sum(w#distmat))/sumw; *average diatnce between point pairs; semivar[i,]= ((sum(w#diffmat))/sumw)*.5; *semivariance; gc[i,]= semivar[i,]/variance; *gearys c; moran[i,] = ((sum(w#cpmat))/sumw)/ (ss/n); nedges[i,]= sumw/2; end; result= semivar||gc|| moran || lagd|| meandist || nedges; label= {semivar gearys_c morans_i lagdist meandist nedges}; create autocorr from result [colname=label]; append from result ; data autocorr; set autocorr; logmeandist = log10(meandist); logsemivar=log10(semivar); proc print; symbol1 value=dot h=.2 i=join ; *specify the symbol to be used in plots; proc gplot data=autocorr; plot gearys_c * meandist=1; plot semivar*meandist =1; plot morans_i*meandist =1; run; /* try a cubic trend surface */ proc glm data=maya; model date_ad_ = sn we sn*we sn*sn we*we sn*sn*we we*we*sn we*we*we sn*sn*sn / solution; output out=cubic p=pred r=residual; run; data trendsurface3; set xygrid; zhat = 1933.380513 + -6.455280 *sn + -4.983896 *we + 0.014631 *sn*we + 0.013736 *sn*sn + 0.008621 *we*we + -0.000013 *sn*sn*we + -0.000008 *we*we*sn + -0.000006 *we*we*we + -0.000011 *sn*sn* sn; proc gcontour data =trendsurface3; plot sn*we = zhat / anno=mayasites3 levels= 600 to 1100 by 50 llevels= 33 33 33 33 33 1 1 1 1 1; proc g3d data = trendsurface3; plot sn*we = zhat ; run; /* lets look at the residuals */ proc univariate data=quadratic; histogram residual/ cfill=orange; qqplot residual/normal(mu=est sigma=est color=blue l=1 w=2); run; /*contour map of the residuals */ proc g3grid data=cubic out=quickanddirtysurface; grid sn*we = residual /join axis1=0 to 500 by 5 axis2=0 to 500 by 5 ; run; proc gcontour data = quickanddirtysurface; plot sn*we = residual / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; run; /*check the correlograms */ /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use cubic ; ******** supply dataset name; read all var{sn we} into c; ******** names of northing (y) and easting (x) vars; read all var{residual} into x; ******** name of z variable; nsteps= 10 ; ******** enter number of lags ; lag= 20 ; ******** enter lag size; n=nrow(x); distmat= j(n,n,0); gc=j(nsteps,1,0); lagd=gc; semivar=gc; *dimension vars; nedges=gc; moran=gc; diffmat=distmat; cpmat=distmat; meandist=gc; mean = (sum(x)/n); do j=1 to n by 1; *get a matrix of distances between data points; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; diffmat[k,j]= (x[k,]-x[j,])##2; cpmat[k,j] = (x[k,]-mean)#(x[j,]-mean); end; end; z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); variance= ss/(n-1); print variance distmat diffmat cpmat; do i=1 to nsteps; *compute for the i'th lag; lh=((i-1)#lag)+.001; hh=(i#lag)+.001; w= (lh < distmat) & (distmat <= hh); * binary weights matrix; lagd[i,]= i#lag; sumw= sum(w); meandist[i,]= (sum(w#distmat))/sumw; *average diatnce between point pairs; semivar[i,]= ((sum(w#diffmat))/sumw)*.5; *semivariance; gc[i,]= semivar[i,]/variance; *gearys c; moran[i,] = ((sum(w#cpmat))/sumw)/ (ss/n); nedges[i,]= sumw/2; end; result= semivar||gc|| moran || lagd|| meandist || nedges; label= {semivar gearys_c morans_i lagdist meandist nedges}; create autocorr from result [colname=label]; append from result ; data autocorr; set autocorr; logmeandist = log10(meandist); logsemivar=log10(semivar); proc print; symbol1 value=dot h=.2 i=join ; *specify the symbol to be used in plots; proc gplot data=autocorr; plot gearys_c * meandist=1; plot semivar*meandist =1; plot morans_i*meandist =1; run; /* try a quartic trend surface */ proc glm data=maya; model date_ad_= sn we sn*we sn*sn we*we sn*sn*we we*we*sn we*we*we sn*sn*sn sn*sn*sn*we we*we*we*sn sn*sn*we*we we*we*we*we sn*sn*sn*sn / solution; output out=quartic p=pred r=residual; run; data trendsurface3; set xygrid; zhat = + 1881.999910 -18.523312 *sn + 5.513020 *we + 0.040491 *sn*we + -0.06212 *sn*sn + -0.062125 *we*we + -0.000145 *sn*sn*we + 0.000017 *we*we*sn + 0.000152 *we*we*we + -0.000160 *sn*sn*sn + *sn*sn*sn*we + *we*we*we*sn + *sn*sn*we*we + *we*we*we*we + *sn*sn*sn*sn ; proc gcontour data =trendsurface3; plot sn*we = zhat / anno=mayasites3 levels= 600 to 1100 by 50 llevels= 33 33 33 33 33 1 1 1 1 1; proc g3d data = trendsurface3; plot sn*we = zhat ; run; /* lets look at the residuals */ proc univariate data=quadratic; histogram residual/ cfill=orange; qqplot residual/normal(mu=est sigma=est color=blue l=1 w=2); run; /*contour map of the residuals */ proc g3grid data=cubic out=quickanddirtysurface; grid sn*we = residual /join axis1=0 to 500 by 5 axis2=0 to 500 by 5 ; run; proc gcontour data = quickanddirtysurface; plot sn*we = residual / anno=mayasites3 nlevels= 10 llevels= 33 33 33 33 33 1 1 1 1 1; run; /*check the correlograms */ /*invoke Proc IML -- Interactive Matrix Language */ proc iml; use cubic ; ******** supply dataset name; read all var{sn we} into c; ******** names of northing (y) and easting (x) vars; read all var{residual} into x; ******** name of z variable; nsteps= 10 ; ******** enter number of lags ; lag= 20 ; ******** enter lag size; n=nrow(x); distmat= j(n,n,0); gc=j(nsteps,1,0); lagd=gc; semivar=gc; *dimension vars; nedges=gc; moran=gc; diffmat=distmat; cpmat=distmat; meandist=gc; mean = (sum(x)/n); do j=1 to n by 1; *get a matrix of distances between data points; do k=1 to n by 1; *and matrices of squared differences and crossproducts; distmat[k,j]= ((c[k,]-c[j,])*(c[k,]-c[j,])`)##.5; diffmat[k,j]= (x[k,]-x[j,])##2; cpmat[k,j] = (x[k,]-mean)#(x[j,]-mean); end; end; z = x - ((sum(x))/n); *deviations from mean; ss=ssq(z); variance= ss/(n-1); print variance distmat diffmat cpmat; do i=1 to nsteps; *compute for the i'th lag; lh=((i-1)#lag)+.001; hh=(i#lag)+.001; w= (lh < distmat) & (distmat <= hh); * binary weights matrix; lagd[i,]= i#lag; sumw= sum(w); meandist[i,]= (sum(w#distmat))/sumw; *average diatnce between point pairs; semivar[i,]= ((sum(w#diffmat))/sumw)*.5; *semivariance; gc[i,]= semivar[i,]/variance; *gearys c; moran[i,] = ((sum(w#cpmat))/sumw)/ (ss/n); nedges[i,]= sumw/2; end; result= semivar||gc|| moran || lagd|| meandist || nedges; label= {semivar gearys_c morans_i lagdist meandist nedges}; create autocorr from result [colname=label]; append from result ; data autocorr; set autocorr; logmeandist = log10(meandist); logsemivar=log10(semivar); proc print; symbol1 value=dot h=.2 i=join ; *specify the symbol to be used in plots; proc gplot data=autocorr; plot gearys_c * meandist=1; plot semivar*meandist =1; plot morans_i*meandist =1; run;