/* Filename: Bootdiversity.sas Purpose: SAS code to compute diversity statistics AND bootstrap estimates of means and 95% confidence limits for samples of the smallest size observed. Conkey's data on stylistic element frequencies in 5 Magdalenian bone assemblages are use as an example (data from Kaufman 1998, Am. Antiquity 63:73-85) Last Update: FDN 4.16.02 */ data conkey; input element $ Altamira LaMina ElJuyo LaCierro Paloma; cards; 1 2 1 0 0 0 2 12 12 8 5 4 3 7 2 2 1 0 4 1 0 1 2 0 5 0 1 0 0 0 6 3 0 0 0 0 7 12 0 0 0 0 8 15 3 12 7 1 9 0 1 3 3 2 10 3 5 9 2 2 11 1 0 0 1 0 12 1 1 0 0 0 13 12 4 2 4 3 14 7 3 1 0 0 15 3 1 1 2 0 16 11 0 0 0 0 17 3 0 0 0 0 18 1 1 1 0 1 19 7 2 1 2 0 20 2 4 0 0 0 21 4 0 1 0 0 22 3 1 0 0 0 23 3 1 2 1 0 24 1 0 0 0 0 25 5 1 1 0 1 26 1 0 0 0 0 27 1 0 1 0 0 28 1 2 1 0 0 29 0 2 0 0 0 30 2 0 0 0 0 31 0 1 0 0 0 32 1 0 0 0 0 33 0 7 0 0 0 34 1 0 0 1 0 35 1 0 0 0 0 36 1 0 0 0 0 37 3 0 0 0 0 38 0 1 0 0 0 39 2 1 1 1 1 40 1 2 0 0 0 41 4 2 2 0 1 42 5 6 0 2 4 43 4 1 3 1 1 44 5 0 0 0 2 ; proc transpose data=conkey out=conkey1 name=site; id element; run; proc sort; by site; proc print; run; proc iml; /* first define the functions that we will need */ ************************************************************************** Function: RESAMPLE Description:Samples with replacement from an vector of counts. Arguments: X: a row vector of counts e.g. type frequencies. samplesize: size of the sample to be taken. Returns: a row vector of counts sampled from a population with type frequencies given by X.; start resample(x,samplesize); xout= j(1,ncol(x),0); CUMpctvec=cusum(x)/sum(x); pct= x/sum(x); ntype=ncol(x); do i = 1 to samplesize; rannum=uniform(0); do j=1 to ncol(x); if j=1 then do; if (rannum < CUMpctvec[,j]) then xout[,j]=xout[,j]+1; end; if j ^= 1 then do; if ((rannum >= cumpctvec[,j-1]) & (rannum< cumpctvec[,j])) then xout[,j]=xout[,j]+1; end; end; end; return(xout); finish resample; **************************************************************************; ********************************************************************************** Function: diversity Description: Computes diversity statistics from an input assmblage*type matrix Arguments: INMAT: an assemblages (rows) * types (cols) data matrix Returns: OUTMAT: a matrix with the following columns: h_shannon: a column vector of shannon diversity e_shannon: a column vector of shannon evenness d_simpson: a column vector of simpson diversity e_simpson: a column vector of simpson evenness t_f: a column vector of t_f estimates (Neiman 1995) n_types: a column vector of Number of types (richness) in each sample total: a column vector of Sample sizes; start diversity(INMAT); nobs=nrow(inmat); nvars=ncol(inmat); sums=j(nobs,nvars,0); * dimension ; h_shannon=j(nobs,1,0); * dimension a vector for shannon ; total=inmat[,+]; * sample sizes; n_types=(inmat>0)[,+]; * number of types (species, alleles); do i=1 to nvars; sums[,i]=total; end; prop=inmat/sums; *proportions; f=(prop#prop)[,+]; *isonymy; d_simpson=1/f ; *effective number of alleles, Simpson's D; e_simpson = d_simpson#(1/n_types); * evenness for simpson's D; t_f= d_simpson-1; * estimate of theta_f (Neiman 1995) ; do i=1 to nobs; x= prop[i,loc(prop[i,])]; *error trap for division by 0; h_shannon[i,]=-(sum(x#log(x))); *shannon's H; end; e_shannon= h_shannon/log(total); *evenness for shannon's H; outmat = h_shannon || e_shannon || d_simpson || e_simpson || t_f || n_types || total; return (outmat); finish diversity; ****************************************************************************************; **************************************************************************************** Function: BootDivSamples Description: Draws bootstrap samples with replacement from each assemblage (row) in an input data matrix. The size for each sample is the minimumobserved sample size. Arguments: Inmat: an assemblages (rows) * types (cols) data matrix. ActualDivSats: a matrix of observed divesrity stats for the assemblages. Rowlabel: A character matrix giving the names of the assemblages (e.g. sites). NumberBootSamples: Number of Bootstrapped samples to draw from each assemblage -- Use at least 1000. Returns: BOOTSAMPLES: the matrix of bootstrapped samples. BOOTROWLABEL: a char matrix of rowlabels for the bootstrapped samples-- which site are they from?; start BootDivSamples1(Inmat,ActualDivstats,Rowlabel,NumberBootSamples,BOOTSAMPLES,BOOTROWLABEL); iterations= (nrow(inmat)#NumberBootSamples); BootSamples = j(iterations,ncol(Inmat),0); *set up a matrix to store the results; BootRowLabel = cshape(' ',iterations,1,16); * a col vector to store the labels; SampleSize= ActualDivStats[><,7]; iteration = 0; *an index to place the results in the right row; do i= 1 to nrow(InMat); do j=1 to NumberBootSamples; *draw NumberBootSamples bootstrap samples at each sample size; iteration = iteration+1; bootsamples[iteration,] = resample(inmat[i,],SampleSize); BootRowLabel[iteration,] = rowlabel[i,]; end; end; finish BootDivSamples1; ******************************************************************************************************; /* read the data and do the business*/ use conkey1; read all into inmat ; *read in the numeric data; read all var {site} into rowlabel; *read any character variables for row labels; /* compute the actualdiversity stats and output them to a SAS ds. */ ActualDivStats = diversity(INMAT); collabel = {'h_shannon' 'e_shannon' 'd_simpson' 'e_simpson' 't_f' 'n_types' 'total'}; create ActualDivStats from ActualDivStats [rowname=rowlabel colname=collabel ]; append from ActualDivStats [rowname=rowlabel]; /* call the bootstrap sampling module and output the results*/ CALL BootDivSamples1(Inmat,ActualDivstats,Rowlabel,1000,BOOTSAMPLES,BOOTROWLABEL); BootDivStats = diversity(BOOTSAMPLES); create BootDivStats from BootDivStats [rowname=bootrowlabel colname=collabel ]; append from BootDivStats [rowname=bootrowlabel ]; /* Use univariate to summarize the results, then plot them. Note you need to spefify which diversity measure you want summarized and change the code as notted below */ title1 'Simpson''s D'; title2 ' Boostrapped Means and CLs '; goptions reset=global gunit=in ftitle="garamond" ftext="garamond" htitle=.5 htext=.3; proc univariate data =BootDivStats ; class bootrowlabel; var d_simpson; *choose the diversity measure you want plotted; histogram /intertile=1 cfill=cyan nrows=5; *force all the histograms on a single page; output out = d_simpson mean=mean pctlpre=P_ pctlpts=2.5 97.5 ; *name the output dataset; run; proc print; /* plot the means and 95% CLs */ proc transpose out = divplot(rename=(col1=MeanandCL)) name=statistic ; by bootrowlabel; var _numeric_; symbol1 interpol=hiloc width=2; axis1 offset=(1,1) label= ('Site'); *better labels fo the plot; axis2 label= ('Simpson''s D') ; *name of the measaure ; proc gplot; plot MeanandCL*BootrowLabel = 1/ haxis=axis1 vaxis=axis2; run;