/* Filename: TimetechReg.sas Purpose: SAS code for regression analysis of the timetech dataset. First we get the summary stats for each assemblage, including an inverse variance weight. Then we experiment with linear regression and and loess Last Update: FDN 3.17.02 */ title1 'Temporal Trends in Woodland Ceramics'; goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.5 htext=.3; PROC IMPORT OUT= WORK.TIMETECH DATAFILE= "f:\courses\anth588\TimeTech.xls" DBMS=EXCEL2000 REPLACE; GETNAMES=YES; RUN; data timetech1; set timetech; logavth =log(avth); SiteProv= site||prov; if caldate ne .; /*let's get the summary stats that we need */ proc means data=timetech1 noprint nway; class siteprov; id caldate CalStan; *this tells SAS to include CalDate and Calstan in the output dataset; var avth mindiam; output out=sumstats n(avth mindiam) = navth nmindiam mean(avth logavth mindiam) = mavth mlogavth mmindiam cv(avth mindiam) = cvavth cvmindiam stderr(avth logavth mindiam) = seavth selogavth semindiam; /* we now compute the weights for each assemblage and variable -- inverses of the squared SE*/ data sumstats; set sumstats; if navth > 1; * get rid of single-sherd samples; wlogavth = 1/(selogavth**2); *compute weights -- the inverse of the sampling variance; wavth = 1/(seavth**2); wmindiam = 1/(semindiam**2); proc means data=sumstats; var navth nmindiam; output out=totals sum (wlogavth wavth wmindiam navth nmindiam) = totalwlogavth totalwavth totalwmondiam totalavth totalmindiam; proc print; run; /* this forces the weights to add up to the total sample size for all the data */ data sumstats; if _n_=1 then set totals; set sumstats; wlogavth= (wlogavth/totalwlogavth)*totalavth; /*we need the sort so the CLs on the plots look OK */ proc sort; by caldate; proc glm data=sumstats; weight wlogavth; model mlogavth=caldate; output out=regresults p=predicted r=residual uclm=MeanUpperCL lclm=MeanLowerCL; symbol1 color=red value=dot height=.2; symbol2 color=black interpol=join height=2; symbol3 color=black interpol=join height=2; symbol4 color=black interpol=join height=2; proc gplot data=regresults; plot residual*predicted; run; proc gplot data=regresults; plot mlogavth*caldate predicted*caldate MeanUpperCL*caldate MeanLowerCL*caldate/overlay; run; /*and NOW ..... drum roll... for a robust normal qqplot of the residuals*/ proc rank data=regresults out=qqplot; var residual; ranks rank; proc univariate data=qqplot ; var residual; output out=qqplot1 n=n mean=mean std=std median=median qrange=IQR; data qqplot; if _n_=1 then set qqplot1; set qqplot; FValues= (rank-.5)/n ; RobustSD= IQR/1.35; NormalQuantiles= probit(FValues); RobustLine= median + RobustSD*NormalQuantiles; QQResidual= residual-RobustLine; run; proc gplot data=qqplot; plot residual * NormalQuantiles RobustLine *NormalQuantiles / overlay; proc gplot data=qqplot; plot QQResidual*NormalQuantiles; run; proc loess data=sumstats; model mlogavth=caldate / smooth=.4 degree=1 direct residual alpha=.05 clm; id wlogavth; weight wlogavth; ods output OutputStatistics=results; symbol1 color=red value=dot height=.2; symbol2 color=black interpol=join height=2; data results; set results; format _numeric_; proc gplot data=results; plot depvar*caldate pred*caldate lowercl*caldate uppercl*caldate / overlay haxis= (900 to 2300 by 100); run; /*do another loess to the residuals */ proc loess data=RESULTS; model residual=caldate / smooth=.2 degree=1 direct residual alpha=.05 clm; weight wlogavth; ods output OutputStatistics=residresults; /*some housekeeping for the plot*/ data residresults; set residresults; format _numeric_; proc gplot data=residresults; plot depvar*caldate pred*caldate lowercl*caldate uppercl*caldate/ overlay; run; /*and NOW ..... drum roll... for a robust normal qqplot of the residuals*/ proc rank data=results out=qqplot; var residual; ranks rank; proc univariate data=qqplot ; var residual; output out=qqplot1 n=n mean=mean std=std median=median qrange=IQR; data qqplot; if _n_=1 then set qqplot1; set qqplot; FValues= (rank-.5)/n ; RobustSD= IQR/1.35; NormalQuantiles= probit(FValues); RobustLine= median + RobustSD*NormalQuantiles; QQResidual= residual-RobustLine; run; proc gplot data=qqplot; plot residual * NormalQuantiles RobustLine *NormalQuantiles / overlay; proc gplot data=qqplot; plot QQResidual*NormalQuantiles; run;