/* Filename: Peru.sas Purpose: SAS code for regression analysis of the Peru dataset from Schreiber and Kintigh. Last Update: FDN 3.2.02 */ title1 'Site Size and Population in Peru'; goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.5 htext=.3; PROC IMPORT OUT= WORK.peru DATAFILE= "F:\Courses\anth588\PeruSiteSize.xls" DBMS=EXCEL2000 REPLACE; GETNAMES=YES; RUN; proc print; symbol1 color=blue value=dot height=.2; /* what do the distributions look like? */ proc univariate data=peru; var size population; qqplot size population /normal( mu=est sigma=est); run; proc gplot; plot population*size; run; /*let's do some transformations*/ data peru; set peru; logpop=log(population); label logpop ='log(Population)'; logsize=log(size); label logsize ='log(Size)'; /*check to see if things are better*/ proc univariate data=peru; var logsize logpop; qqplot logsize logpop /normal( mu=est sigma=est); run; symbol1 color=red value=dot height=.2 ; symbol2 color=blue value=dot height=.2 ; proc gplot; plot logpop*logsize =sitetype; run; /*OK lets do some regressions -- we'll use GLM*/ title2 "Regression Analysis"; proc glm data=peru; where sitetype eq'village'; model logpop= logsize; output out=regresults p=predicted r=residual ucl=PredictionUpperCL lcl=PredictionLowerCL uclm=MeanUpperCL lclm=MeanLowerCL; symbol1 color=red value=dot height=.2; symbol2 color=black interpol=join height=2; symbol3 color=black interpol=join height=2; symbol4 color=black interpol=join height=2; symbol5 color=blue interpol=join height=2; symbol6 color=blue interpol=join height=2; proc sort data=regresults; by logsize; /*plot the results -- including the residuals! */ proc gplot data=regresults; plot residual*predicted; run; proc univariate data=regresults ; var residual; qqplot residual /normal (mu=est sigma=est); proc gplot data=regresults; plot logpop*logsize predicted*logsize PredictionUpperCL*logsize PredictionLowerCL*logsize MeanUpperCL*logsize MeanLowerCL*logsize/overlay; run;