/*Filename: BetaBinomial.sas */ /*Purpose: Empirical-Bayes estimation of binomial proportions from multiple samples */ /* Choice of threee methods, each with a different way of estimating the variance */ /* of the beta prior. */ /*Last Update: FDN 3.29.07 */ options ls=120; data braun; set braun1; id=compress(group||tclass); if group='LIV'; proc print; run; proc iml; use braun; ******** name of input dataset; read all into x [colname=vars rowname=id]; ******** specify an variable that Ids the rows (rowname=); method = 1 ; ******** Choose the method: ; * 1) variance of the prior is estimated as the weighted ; * variance of the samples from the grand mean ; * 2) variance of the prior is estimated as the weighted ; * variance of the samples from the grand mean, minus ; * the estimated sampling variance. If negative variance ; * estimates result, a warning is printed and no estimates; * are made ; * 3) Uses estimates from 2), where prior varaince estimates ; * are positive, 1) where they are negative ; *****************************************************************************************; start betabinomial1(x,POSTMEAN,POSTX,POSTVAR); *Empirical-Bayes estimates of cell frequencies a la Iversen 1984 and Robertson 1999; *Uses the unadjusted weighted sample varinace to estimate the varaince of the prior; *arguments: x: The observed freqencies in several samples ; *Result: Postx: The posterior counts ; * Postmean: the posterior proportions ; * Postvar: The posterior variance ; *house keeping so we dont get tipped up by 0s; outx1=j(nrow(x),ncol(x),0); outx2=outx1; outx3=outx1; oldx=x; nonzeroindex=loc(x[+,]); x=oldx[,nonzeroindex]; *end house keeping; nrows=nrow(x); colmarg= x[+,]; rowmarg= x[,+]; Nbar=sum(X)/nrows; Nmat = repeat(rowmarg,1,ncol(x)) ; pi= x / Nmat ; lambda = x[+,]/sum(x); *The overeall weighted mean of the p(i)s; lambdamat= repeat(lambda,nrows,1); wtdsqdevs = nmat # ((pi-lambdamat)##2); s2vec = wtdsqdevs[+,]/sum(x) ; a= lambda # (((lambda #(1-lambda ))/s2vec)-1) ; b= (1-lambda ) # (((lambda #(1-lambda ))/s2vec)-1) ; print a b; amat= repeat(a,nrows,1); bmat= repeat(b,nrows,1); Postmean= (x+amat)/ (nmat+amat+bmat); Postvar = (postmean#(1-postmean)) /(x+amat+bmat+1); PostX = postmean # nmat; *house keeping again; outx1[,nonzeroindex]=postmean; postmean=outx1; outx2[,nonzeroindex]=postx; postx=outx2; outx3[,nonzeroindex]=postvar; postvar=outx3; finish betabinomial1; *****************************************************************************************; *****************************************************************************************; start betabinomial2(x,POSTMEAN,POSTX,POSTVAR); * Empirical-Bayes estimates of cell frequencies. ; * Uses the weighted sample variance less the estimated sampling variance to ; * estimate the variance of the prior (see Martuzzi and Elliot 1996:1868) ; *arguments: x: The observed freqencies in several samples ; *Result: Postx: The posterior counts ; * Postmean: the posterior proportions ; * Postvar: The posterior variance ; *house keeping so we dont get tipped up by 0s; outx1=j(nrow(x),ncol(x),0); outx2=outx1; oldx=x; nonzeroindex=loc(x[+,]); x=oldx[,nonzeroindex]; *end house keeping; nrows=nrow(x); colmarg= x[+,]; rowmarg= x[,+]; Nbar=sum(X)/nrows; Nmat = repeat(rowmarg,1,ncol(x)) ; pi= x / Nmat ; lambda = x[+,]/sum(x); *The overeall weighted mean of the p(i)s; lambdamat= repeat(lambda,nrows,1); wtdsqdevs = nmat # ((pi-lambdamat)##2); s2vec = wtdsqdevs[+,]/sum(x) ; s2vec1 = (1/nbar)#(lambda#(1-lambda)); if sum(s2vec <= s2vec1) > 0 then print 'Estimated sampling variance is is greater than observed weighted sample variance. Prior cannot be estimated. Use Method2 1 or 3.'; if sum(s2vec <= s2vec1) > 0 then stop; s2vec2= (s2vec-s2vec1)/(1-(1/nbar)); *Martuzzi and Elliott 1996 prior variance; a= lambda # (((lambda #(1-lambda ))/s2vec2)-1) ; b= (1-lambda ) # (((lambda #(1-lambda ))/s2vec2)-1) ; print a b; amat= repeat(a,nrows,1); bmat= repeat(b,nrows,1); Postmean= (x+amat)/ (nmat+amat+bmat); print postmean; Postvar = (postmean#(1-postmean)) /(x+amat+bmat+1); PostX = postmean # nmat; *house keeping again; outx1[,nonzeroindex]=postmean; postmean=outx1; outx2[,nonzeroindex]=postx; postx=outx2; outx3[,nonzeroindex]=postvar; postvar=outx3; finish betabinomial2; *****************************************************************************************; *****************************************************************************************; start betabinomial3(x,POSTMEAN,POSTX,POSTVAR); * Emprircal-Bayes estimates of cell frequencies ; * Uses the weighted sample variance less the estimated sampling variance to ; * estimate the variance of the prior (see Martuzzi and Elliot 1996:1868), ; * when the estimated sampling variance does not exceed the actual weighted ; * sample variance. Uses the unadjusted weighted sample varaince otherwise ; *arguments: x: The observed freqencies in several samples ; *Result: Postx: The posterior counts ; * Postmean: the posterior proportions ; * Postvar: The posterior variance ; *house keeping so we dont get tipped up by 0s; outx1=j(nrow(x),ncol(x),0); outx2=outx1; outx3=outx1; oldx=x; nonzeroindex=loc(x[+,]); x=oldx[,nonzeroindex]; *end house keeping; nrows=nrow(x); colmarg= x[+,]; rowmarg= x[,+]; Nbar=sum(X)/nrows; Nmat = repeat(rowmarg,1,ncol(x)) ; pi= x / Nmat ; lambda = x[+,]/sum(x); *The overeall weighted mean of the p(i)s; lambdamat= repeat(lambda,nrows,1); wtdsqdevs = nmat # ((pi-lambdamat)##2); s2vec = wtdsqdevs[+,]/sum(x) ; s2vec1 = (1/nbar)#(lambda#(1-lambda)); test = sum(s2vec <= s2vec1); if test > 0 then do; index = loc(s2vec <= s2vec1); s2vec2= (s2vec-s2vec1)/(1-(1/nbar)); *Martuzzi and Elliott 1996 prior variance; s2vec2[,index] = s2vec[,index]; end; a= lambda # (((lambda #(1-lambda ))/s2vec2)-1) ; b= (1-lambda ) # (((lambda #(1-lambda ))/s2vec2)-1) ; print a b; amat= repeat(a,nrows,1); bmat= repeat(b,nrows,1); Postmean= (x+amat)/ (nmat+amat+bmat); print postmean; Postvar = (postmean#(1-postmean)) /(x+amat+bmat+1); PostX = postmean # nmat; print postx; *house keeping again; outx1[,nonzeroindex]=postmean; postmean=outx1; outx2[,nonzeroindex]=postx; postx=outx2; outx3[,nonzeroindex]=postvar; postvar=outx3; finish betabinomial3; *****************************************************************************************; if method =1 then do; call BetaBinomial1(x,POSTMEAN,POSTX,POSTVAR); end; if method =2 then do; call BetaBinomial2(x,POSTMEAN,POSTX,POSTVAR); end; if method =3 then do; call BetaBinomial3(x,POSTMEAN,POSTX,POSTVAR); end; prefix = j(1,ncol(postx),'b_'); bestvarnames= concat(prefix,vars); create postx from postx [colname=bestvarnames rowname=id]; append from postx [ rowname=id]; create postmean from postmean [colname=bestvarnames rowname=id]; append from postmean [ rowname=id]; create postvar from postvar [colname=bestvarnames rowname=id]; append from postvar [ rowname=id]; run; proc print data=postx; title 'The posterior counts'; proc print data=postmean; title 'The posterior means'; proc print data=postvar; title'The posterior variances'; run;