/* Filename: Diversity.sas Purpose: SAS code to compute diversity statistics. Conkey's data on stylistic element frequencies in 5 Magdalenian bone assemblages are use as an example (data from Kaufman 1998, Am. Antiquity 63:73-85) Last Update: FDN 4.15.02 */ data conkey; input element $ Altamira LaMina ElJuyo LaCierro Paloma; cards; 1 2 1 0 0 0 2 12 12 8 5 4 3 7 2 2 1 0 4 1 0 1 2 0 5 0 1 0 0 0 6 3 0 0 0 0 7 12 0 0 0 0 8 15 3 12 7 1 9 0 1 3 3 2 10 3 5 9 2 2 11 1 0 0 1 0 12 1 1 0 0 0 13 12 4 2 4 3 14 7 3 1 0 0 15 3 1 1 2 0 16 11 0 0 0 0 17 3 0 0 0 0 18 1 1 1 0 1 19 7 2 1 2 0 20 2 4 0 0 0 21 4 0 1 0 0 22 3 1 0 0 0 23 3 1 2 1 0 24 1 0 0 0 0 25 5 1 1 0 1 26 1 0 0 0 0 27 1 0 1 0 0 28 1 2 1 0 0 29 0 2 0 0 0 30 2 0 0 0 0 31 0 1 0 0 0 32 1 0 0 0 0 33 0 7 0 0 0 34 1 0 0 1 0 35 1 0 0 0 0 36 1 0 0 0 0 37 3 0 0 0 0 38 0 1 0 0 0 39 2 1 1 1 1 40 1 2 0 0 0 41 4 2 2 0 1 42 5 6 0 2 4 43 4 1 3 1 1 44 5 0 0 0 2 ; proc transpose data=conkey out=conkey1(drop=_label_) name=site; id element; proc sort; by site; proc print; run; /* use IML to compute the diversity stats */ proc iml; ************************************************************************** Function: theta_solve Description:iterative estimation of theta -- t_e (Ewens 1972, Neiman 1995) Arguments: n: Sample size s: Number of types Returns: est: a single theta estimate -- t_e; start theta_solve(s,n); maxiter=1000 ; *maximum number of iterations; res= .0001 ; *convergence criterion; valuevec=j(maxiter,1,0); target=s; est=1; inc=.1; valuevec[1,]= sum((est#j(1,n,1))/(est+(0:(n-1)))); /*equation to solve*/ i=2; do until ((i>maxiter) | abs((valuevec[i-1,]-target))0)^= ((valuevec[i,]-target)>0) then inc= -(inc/2); /*has an over shoot occurred?*/ est=est+inc; i=i+1; end; if abs((valuevec[i-1,]-target))> res then est=.; return(est); finish theta_solve; ********************************************************************************; ******************************************************************************** Function: theta_vec Description: Turns function THETA_SOLVE loose on vectors of aguments and returns a vector of estimates Arguments: Total: a column vector of Sample sizes n_types: a column vector of Number of types (richness) in each sample Requires: Function theta_solve Returns: Theta_EST_VEC: a Vector of theta estimates; start theta_vec(n_types,total); theta_est_vec=j(nrow(n_types),1,0); do i=1 to nrow(n_types) by 1; theta_est_vec[i,]= theta_solve(n_types[i,],total[i,]); end; return (theta_est_vec); finish theta_vec; *********************************************************************************; ********************************************************************************** Function: diversity Description: Computes diversity statistics from an input assmblage*type matrix Arguments: Returns: h_shannon: a column vector of shannon diversity e_shannon: a column vector of shannon evenness d_simpson: a column vector of simpson diversity e_simpson: a column vector of simpson evenness t_f: a column vector of t_f estimates (Neiman 1995) total: a column vector of Sample sizes n_types: a column vector of Number of types (richness) in each sample; start diversity(INMAT,h_shannon,e_shannon,d_simpson,e_simpson,t_f,total,n_types); nobs=nrow(inmat); nvars=ncol(inmat); sums=j(nobs,nvars,0); * dimension ; h_shannon=j(nobs,1,0); * dimension a vector for shannon ; total=inmat[,+]; * sample sizes; n_types=(inmat>0)[,+]; * number of types (species, alleles); do i=1 to nvars; sums[,i]=total; end; prop=inmat/sums; *proportions; f=(prop#prop)[,+]; *isonymy; d_simpson=1/f ; *effective number of alleles, Simpson's D; e_simpson = d_simpson#(1/n_types); * evenness for simpson's D; t_f= d_simpson-1; * estimate of theta_f (Neiman 1995) ; do i=1 to nobs; x= prop[i,loc(prop[i,])]; *error trap for division by 0; h_shannon[i,]=-(sum(x#log(x))); *shannon's H; end; e_shannon= h_shannon/log(total); *evenness for shannon's H; finish diversity; ************************************************************************; /* OK let's read the data and do the business */ use conkey1 ; *specify the dataset; read all into inmat ; *read in the numeric data; read all var {site }; *read in the character variable for labels; call diversity(INMAT,h_shannon,e_shannon,d_simpson,e_simpson,t_f,total,n_types); t_e=theta_vec(n_types,total); create div var { site h_shannon e_shannon d_simpson e_simpson t_f t_e total n_types }; append; close div ; proc plot; plot t_e* t_f ='*' $ site; proc print; run;