/* Filename: TimetechCorr.sas Purpose: SAS code for correlation analysis of the timetech dataset. First we get the summary stats for each assemblage Then we experiment with loess to hunt for trends. Last Update: FDN 3.30.02 */ title1 'Temporal Trends in Woodland Ceramics'; goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.5 htext=.3; PROC IMPORT OUT= WORK.TIMETECH DATAFILE= "f:\courses\anth588\TimeTech.xls" DBMS=EXCEL2000 REPLACE; GETNAMES=YES; RUN; data timetech1; set timetech; logavth =log(avth); logmindiam =log(mindiam); SiteProv= site||prov; if caldate ne .; proc sort data=timetech1; by SiteProv; proc plot; by siteprov; plot logavth *logmindiam; run; data caldate; set timetech1; by siteProv; if first. SiteProv; keep SiteProv caldate; title2 'Pearson Correlations'; proc corr data=timetech1 outp=corr (rename=(logavth=corr)); by SiteProv; var logavth; with logmindiam; data corr; set corr; if _type_= 'N' or _type_='CORR'; proc transpose data=corr out=corr; by SiteProv; id _type_; var corr; data corr; merge Caldate corr; by siteprov; if n > 1; wcorr= 1/sqrt(n); proc sort; by caldate; /* now for the loess analysis */ proc loess data=corr; model corr=caldate / smooth=.4 degree=2 residual alpha=.05 clm direct; id wcorr; weight wcorr; ods output OutputStatistics=results; data results; set results; format _numeric_; symbol1 color=red value=dot height=.2; symbol2 color=black interpol=join height=2; symbol3 color=black interpol=join height=2; symbol4 color=black interpol=join height=2; proc gplot data=results; plot depvar*caldate pred*caldate lowercl*caldate uppercl*caldate/ vref=0 overlay; run; /*do another loess to the residuals */ proc loess data=RESULTS; model residual=caldate / smooth=.4 degree=1 direct residual alpha=.05 clm; weight wcorr; ods output OutputStatistics=residresults; data residresults; set residresults; format _numeric_; proc gplot data=residresults; plot depvar*caldate pred*caldate lowercl*caldate uppercl*caldate/ overlay; run; /*and NOW ..... drum roll... for a robust normal qqplot of the residuals*/ proc rank data=results out=qqplot; var residual; ranks rank; proc univariate data=qqplot ; var residual; output out=qqplot1 n=n mean=mean std=std median=median qrange=IQR; data qqplot; if _n_=1 then set qqplot1; set qqplot; FValues= (rank-.5)/n ; RobustSD= IQR/1.35; NormalQuantiles= probit(FValues); RobustLine= median + RobustSD*NormalQuantiles; QQResidual= residual-RobustLine; run; proc gplot data=qqplot; plot residual * NormalQuantiles RobustLine *NormalQuantiles / overlay; proc gplot data=qqplot; plot QQResidual*NormalQuantiles; run;