/*Filename: SpatialBetaBinomial.sas */ /*Purpose: Empirical Bayes smoothing of quadrats counts */ /*Last Update: FDN 4.9.07 */ data coords; ******create the dataset with the quadrat coordinates and quadrat ID's; set site8counts; keep QuadratID meannorthing meaneasting; data smoothit; ********create the dataset with the quadrat counts and quadrat ID's; set set site8counts; drop meannorthing meaneasting; proc iml; *-------------------------------------------------------------------------------------; use smoothit; * smoothit contains the quadrat artifacts freqs; read all into x [colname=vars rowname=QuadratID]; use coords; * coords contains the quadrat northings and eastings; read all into c; range=40; *specify size of the spatial neighborhood; MinGoodQuads =5 ; *Minimum number of quadrats in a neighboorhood; *with the required sample size; minN = 5; *required sample size; *---------------------------------------------------------------------------------------; **************************************************************************************; start checkdistances(coords,x,QuadratID,range,minN,NumGoodQuadrats); * For the ith Quadrat, figure out how many quadrats lie with the specified RANGE and have ; * at least MinN artifacts. This is the number of quadrats that will be used to estimate ; * the beta parameters for the ith quadrat.; * arguments: MinN: the minimum sample size for beta estimation; * coords: the input spatial coordinates of all quadrats; * range: distance class past which weights are zero; * x: the full data matrix; * result: NumGoodQuadrats:a vector with the number of that can be used for estamtion; * for the ith quadrat; rows= nrow(coords); distcheck=j(rows,rows,0); do i=1 to rows by 1; do j=1 to rows by 1; distcheck[i,j]= ((( coords[i,]- coords[j,])*( coords[i,]- coords[j,])`)##{.5}) <= (range+{.001}); end; end; *print distcheck; samplesizecheck= repeat(x[,+]`,rows,1) >= minN ; *print samplesizecheck; check= samplesizecheck # distcheck; *print check; NumGoodQuadrats=(check[,+]); * number of quads with minsample artifacts and within range; create NumGoodQuadrats from NumGoodQuadrats [rowname=QuadratID]; append from NumGoodQuadrats [rowname=QuadratID]; close smoothx; finish checkdistances; **************************************************************************************; **************************************************************************************; start getnbrhd(index,x,coords,range,MinN,X_NBRHD); * get a matrix of artifact counts for all quads ; * within a given distance (range) of the indexed quad ; * arguments: index: the index of the ith quad ; * x: the full data matrix ; * coords: the input spatial coordinates of all quadrats ; * MinN Minimum sample size for a quad ; * range: distance class past which weights are zero ; * result: X_NBRHD: a matrix of artifact counts for the neighborhood, ; * sorted by distance from the indexed quad ; rows= nrow(coords); dist= j(rows,1,.); BigEnough=dist; do j=1 to rows; *get distances from the indexed quad to the other quads; dist [j,]= ((( coords[index,]- coords[j,])*( coords[index,]- coords[j,])`)##{.5}); BigEnough [j,]= (x[j,+] >= MinN); end; NumGoodQuads= sum(BigEnough); spatwts = (dist <= (range+.01)) & BigEnough; x_nbrhd= x[loc(spatwts),]; Nbrhd_distances = dist[loc(spatwts),]; *sort the vectors; Nbrhd_distances1=Nbrhd_distances; ranks=rank(Nbrhd_distances); Nbrhd_distances[ranks,]= Nbrhd_distances1; x_nbrhd1 = x_nbrhd; x_nbrhd[ranks,]=x_nbrhd1; finish getnbrhd; *****************************************************************************************; *****************************************************************************************; start betabinomial3(x,POSTX); * Emprircal-Bayes estimates of cell frequencies ; * Uses the weighted sample variance less the estimated sampling variance to ; * estimate the variance of the prior (see Martuzzi and Elliot 1996:1868), ; * when the estimated sampling variance does not exceed the actual weighted ; * sample variance. Uses the unadjusted weighted sample varaince otherwise ; *arguments: x: The observed freqencies in several samples ; *Result: Postx: The posterior counts ; *house keeping so we dont get tipped up by 0s; outx=j(1,ncol(x),0); oldx=x; nonzeroindex=loc(x[+,]); x=oldx[,nonzeroindex]; *end house keeping; nrows=nrow(x); colmarg= x[+,]; rowmarg= x[,+]; Nbar=sum(X)/nrows; Nmat = repeat(rowmarg,1,ncol(x)) ; pi= x / Nmat ; lambda = x[+,]/sum(x); *The overeall weighted mean of the p(i)s; lambdamat= repeat(lambda,nrows,1); wtdsqdevs = nmat # ((pi-lambdamat)##2); s2vec = wtdsqdevs[+,]/sum(x) ; s2vec1 = (1/nbar)#(lambda#(1-lambda)); test = sum(s2vec <= s2vec1); if test > 0 then do; index = loc(s2vec <= s2vec1); s2vec2= (s2vec-s2vec1)/(1-(1/nbar)); *Martuzzi and Elliott 1996 prior variance; s2vec2[,index] = s2vec[,index]; end; if test = 0 then s2vec2= (s2vec-s2vec1)/(1-(1/nbar)) ; a= lambda # (((lambda #(1-lambda ))/s2vec2)-1) ; b= (1-lambda ) # (((lambda #(1-lambda ))/s2vec2)-1) ; PRINT A B; *amat= repeat(a,nrows,1); *bmat= repeat(b,nrows,1); Postmean= (x[1,]+ a) /(rowmarg[1,]+a+b); print postmean; PostX = postmean # rowmarg[1,]; PRINT POSTx; *house keeping again; outx[,nonzeroindex]=postx; postx=outx; PRINT POSTX; finish betabinomial3; *****************************************************************************************; smoothx= j(nrow(x),ncol(x),.); *dimension a matrix for the results; call checkdistances(c,x,QuadratID,range,minN,NumGoodQuadrats); do index=1 to nrow(x); if NumGoodQuadrats[index,] >= MinGoodQuads then do; call getnbrhd(index,x,c,range,MinN,X_NBRHD); call betabinomial3(x_nbrhd,POSTXi); smoothx[index,]= PostXi; end; end; create smoothx from smoothx [rowname=QuadratID colname=vars ]; append from smoothx [rowname=QuadratID]; close smoothx; Title1 ' '; proc print data=smoothx; run; proc corresp data =smoothx out=ca short dimens=3; var _numeric_ ; id quadratid; *plot the results; proc sort; by _type_ inertia; data ca_obs; set ca; if _type_ eq 'OBS'; rank=_n_; goptions reset ftitle="verdana" ftext="verdana" htitle=3 htext=3; symbol1 color=black value=dot color=orange pointlabel =("#QuadratID" font="garamond" h=1.5); *plot the inertias; axis1 offset=(2,2) label=(angle=90 'Inertia') major=(height=3) minor=none width=2; axis2 offset=(2,2) label= ('Rank' ) major=(height=3) minor=none width=2; proc gplot; plot inertia*rank / haxis=axis2 vaxis=axis1; run; data ca_vars; set ca; if _type_ eq 'VAR'; rank= _n_; proc gplot; plot inertia*rank / haxis=axis2 vaxis=axis1; run; proc gplot data=ca_obs; plot sqcos2*sqcos1 sqcos3*sqcos1 ; run; *plot the scores; proc gplot data=ca_vars; plot sqcos2*sqcos1 sqcos3*sqcos1 ; run; proc gplot data=ca; by _type_; plot dim2*dim1 / haxis=axis2 vaxis=axis1; axis1 offset=(2,2) label=(angle=90 'Dimension 2') major=(height=2) minor=(height=1) width=2; axis2 offset=(2,2) label= ('Dimension 1' ) major=(height=2) minor=(height=1) width=2; run; proc gplot data=ca; by _type_; plot dim3*dim1 / haxis=axis2 vaxis=axis1; *plot the scores; axis1 offset=(2,2) label=(angle=90 'Dimension 3') major=(height=2) minor=(height=1) width=2; axis2 offset=(2,2) label= ('Dimension 1' ) major=(height=2) minor=(height=1) width=2; run; /* ******* This bit allows you to plot CA-based groups in space. But first you have to ID the groups on the basis of their CA scores; proc sort data= ca_obs; by quadratid; proc sort data= coords; by quadratid; data plotit; merge ca_obs coords; by quadratid; data plotit; merge ca_obs coords; by quadratid; if dim1 ne . then do; if dim1 > 1.2 then group= '1a'; if (dim2 < 0) and (dim1 > .45) and (dim1 <= 1.2) then group='1b'; if dim1 < .45 and dim1 > .15 and dim2 < .3 then group='1c'; if dim2 > .6 then group='2a'; if dim2 > .3 and dim2 < .6 then group= '2b'; if dim1 < .15 and dim1 > -.2 and dim2 < .3 then group ='3a'; if dim1 < -.2 and dim2 < .3 then group ='3b'; end; symbol1 color=black value=none color=orange pointlabel =("#group" font="garamond" h=1); proc gplot; plot meannorthing* meaneasting =group ; run; */