* --------------------------------------------------------------------------- ; * Electronic Appendix B; * SAS macro to calculate RD for generalized linear mixed models (GLMM) by Eq. 14 using proc GLIMMIX, proc MIXED, or proc GENMOD of SAS 9.4.; * Revised on July 2, 2021; * Kohji Yamamura (2016); * Estimation of the predictive ability of ecological models.; * Communications in Statistics - Simulation and Computation, 45:2122-2144; * DOI: 10.1080/03610918.2014.889161.; * ASSUMPTIONS; * The observations independently fluctuate around the expectation, following a normal, Poisson, or binomial distribution.; * The expectation of each observation fluctuates independently around the mean, following a normal distribution in a transformed scale.; * We can further assume that a group of mean may independently fluctuate.; * NOTE; * Due to the central limit theorem, the assumption of homoscedastic normal distribution involves the implicit assumption that the model is additive in the current scale. If we directly apply a normal distribution to observations, therefore, we cannot use any link function other than the identity function. We should appropriately transform the observation before performing the analysis when we use a normal distribution for errors.; * INSTRUCTION; * Please specify the most complex model as the true model.; * Please use the option "ShowModel=" to specify the number of models whose estimates of parameters should be shown. The default setting is 5 models. ; * Please use dist=negbin for a negative binomial model. A negative binomial model can be used as an approximation to the logarithmic Poisson GLMM.; * For binomial response, please use the aggregative usage such as 'model r/n =' instead of the usage such as 'model r(event="1") ='.; * Please specify OverDispersion=0, if we use pure Poisson or pure binomial GLM by ignoring the variability in expectation.; * Please specify OverDispersion=0 and scale=pscale if we use quasi-Poisson GLM (that corresponds to NB1) or quasi-binomial GLM.; * Please specify method=rspl if we use a model in which the default method (laplace) does not work.; * --- Definition of macro RDcompare --- ; %macro RDcompare(data=, class=, DepVar=, TrueModel=, random=, dist=normal, link=identity, offset=, subject=, type=, repeated=, OverDispersion=1, scale=, method=laplace, ShowModel=5); %if %bquote(&dist)=n or %bquote(&dist)=g or %bquote(&dist)=gaussian %then %let dist=normal; %if %bquote(&link)=id %then %let link=identity; %if %bquote(&dist)=normal and %bquote(&link)^=identity %then %do; %put; %put ** Error: The link function is not allowed for normal errors. **; %put; %goto exit; %end; %if %length(&random)*%length(&repeated)>0 %then %do; %put; %put ** Error: Random and repeated statements cannot be used simultaneously. **; %put; %goto exit; %end; %if %bquote(&dist)^=normal and %length(&repeated)>0 %then %do; %put; %put ** Error: Repeated statement cannot be used in GLMM. **; %put; %goto exit; %end; data factors; tmp = symget("TrueModel"); cnt=count(trim(tmp),' ')+1; * put cnt=; do i=1 to cnt; factor=scan(tmp,i,' '); output; keep factor; end; call symput("FactNum",left(put(cnt,8.))); call symput("ModelNum",left(put(2**cnt,8.))); run; %if %bquote(&dist)=negbin %then %let OverDispersion=0; %RDglimmix(data=&data, class=&class, DepVar=&DepVar, TrueModel=&TrueModel, Model=, random=&random, dist=&dist, link=&link, offset=&offset, subject=&subject, type=&type, repeated=&repeated, DisplayResults=0, method=&method, CCall=1, fst=0, OverDispersion=&OverDispersion, scale=&scale, SecondCall=0); data total; set RDcal; run; %do i=2 %to &ModelNum; data factorComb; set factors; if mod(floor((&i-0.5)/(2**(&FactNum-_n_))),2)=1 then output; run; data _null_; call symput("flag","1"); run; data factmp; set factorComb; retain facsum; if _n_ = 1 then facsum=factor; if _n_ > 1 then facsum=strip(facsum)|| " " || strip(factor); run; data factmp2; set factmp; facsum2=lag(facsum); run; data _null_; set factmp2; if count(factor,"*")>0 then do; numf=count(trim(factor),'*')+1; do i=1 to numf; tmp2=scan(factor,i,'*'); if index(strip(facsum2),compress(tmp2))=0 then call symput("flag","0"); end; end; if count(factor,"*")=2 then do; tmp3=scan(factor,1,'*')||"*"||scan(factor,2,'*'); if index(strip(facsum2),compress(tmp3))=0 then call symput("flag","0"); tmp4=scan(factor,2,'*')||"*"||scan(factor,3,'*'); if index(strip(facsum2),compress(tmp4))=0 then call symput("flag","0"); tmp5=scan(factor,1,'*')||"*"||scan(factor,3,'*'); if index(strip(facsum2),compress(tmp5))=0 then call symput("flag","0"); end; if count(factor,"*")=3 then do; tmp6=scan(factor,1,'*')||"*"||scan(factor,2,'*'); if index(strip(facsum2),compress(tmp6))=0 then call symput("flag","0"); tmp7=scan(factor,1,'*')||"*"||scan(factor,3,'*'); if index(strip(facsum2),compress(tmp7))=0 then call symput("flag","0"); tmp8=scan(factor,1,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp8))=0 then call symput("flag","0"); tmp9=scan(factor,2,'*')||"*"||scan(factor,3,'*'); if index(strip(facsum2),compress(tmp9))=0 then call symput("flag","0"); tmp10=scan(factor,2,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp10))=0 then call symput("flag","0"); tmp11=scan(factor,3,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp11))=0 then call symput("flag","0"); tmp12=scan(factor,1,'*')||"*"||scan(factor,2,'*')||"*"||scan(factor,3,'*'); if index(strip(facsum2),compress(tmp12))=0 then call symput("flag","0"); tmp13=scan(factor,1,'*')||"*"||scan(factor,2,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp13))=0 then call symput("flag","0"); tmp14=scan(factor,1,'*')||"*"||scan(factor,3,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp14))=0 then call symput("flag","0"); tmp15=scan(factor,2,'*')||"*"||scan(factor,3,'*')||"*"||scan(factor,4,'*'); if index(strip(facsum2),compress(tmp15))=0 then call symput("flag","0"); end; run; %if &flag=1 %then %do; proc transpose data=factorComb out=factorCombT prefix=term_; var factor; run; data _null_; set factorCombT; AllTerm=catx(' ',of term_:); keep AllTerm; call symput('Model',AllTerm);; run; %RDglimmix(data=&data, class=&class, DepVar=&DepVar, TrueModel=&TrueModel, Model=&Model, random=&random, dist=&dist, link=&link, offset=&offset, subject=&subject, type=&type, repeated=&repeated, DisplayResults=0, method=&method, CCall=1, fst=0, OverDispersion=&OverDispersion, scale=&scale, SecondCall=1); proc append base = total data = RDcal; run; %end; * End of skip for non-hierarchical models; %end; proc sort data=total out=totalsort0; by descending RD; run; data totalsort; Rank = _n_; set totalsort0; run; ods select all; proc print data=totalsort noobs; run; data _null_; call symput("totaln2",trim(left(put(min(%eval(&ModelNum),%eval(&ShowModel)),8.)))); run; %if %eval(&ShowModel)>0 %then %do; %if %eval(&OverDispersion)>0 %then %do; title2 'Estimates of variances'; proc print data=pmat noobs; run; %end; %do j=1 %to %eval(&totaln2); data totalsort2; set totalsort; if _n_ = &j then %do; call symput('Model',trim(model)); %end; run; %if &j=1 %then %do; %RDglimmix(data=&data, class=&class, DepVar=&DepVar, TrueModel=&TrueModel, Model=&Model, random=&random, dist=&dist, link=&link, offset=&offset, subject=&subject, type=&type, repeated=&repeated, DisplayResults=1, method=&method, CCall=1, fst=1, OverDispersion=&OverDispersion, scale=&scale, SecondCall=1); %end; %if &j>1 %then %do; %RDglimmix(data=&data, class=&class, DepVar=&DepVar, TrueModel=&TrueModel, Model=&Model, random=&random, dist=&dist, link=&link, offset=&offset, subject=&subject, type=&type, repeated=&repeated, DisplayResults=1, method=&method, CCall=1, fst=0, OverDispersion=&OverDispersion, scale=&scale, SecondCall=1); %end; run; %end; run; %end; %exit: %mend RDcompare; * --- Definition of macro RDglimmix --- ; %macro RDglimmix(data=, class=, DepVar=, TrueModel=, Model=, random=, dist=normal, link=identity, offset=, subject=, type=, repeated=, DisplayResults=1, method=laplace, CCall=0, fst=1, OverDispersion=1, scale=, SecondCall=0); %if %bquote(&dist)=n or %bquote(&dist)=g or %bquote(&dist)=gaussian %then %let dist=normal; %if %bquote(&link)=id %then %let link=identity; %if %bquote(&dist)=normal and %bquote(&link)^=identity %then %do; %put; %put ** Error: The link function is not allowed for normal errors. **; %put; %goto exit; %end; %if %length(&random)*%length(&repeated)>0 %then %do; %put; %put ** Error: Random and repeated statements cannot be used simultaneously. **; %put; %goto exit; %end; %if %bquote(&dist)^=normal and %length(&repeated)>0 %then %do; %put; %put ** Error: Repeated statement cannot be used in GLMM. **; %put; %goto exit; %end; * Proc MIXED is used instead of Proc GLIMMIX for normal errors.; %if %bquote(&dist)=normal %then %do ; %RDmixed(data=&data,class=&class,DepVar=&DepVar,TrueModel=&TrueModel,Model=&Model,random=&random,subject=&subject,type=&type,repeated=&repeated,DisplayResults=&DisplayResults, CCall=&CCall, fst=&fst, SecondCall=&SecondCall); %goto exit; %end; * Proc GENMOD is used if the model contains no OverDispersion.; %if %eval(&OverDispersion)=0 %then %do ; %RDgenmod(data=&data,class=&class,DepVar=&DepVar,TrueModel=&TrueModel,Model=&Model,dist=&dist,link=&link,offset=&offset,type=&type,DisplayResults=&DisplayResults, CCall=&CCall, fst=&fst, OverDispersion=&OverDispersion, scale=&scale, SecondCall=&SecondCall); %goto exit; %end; data data2; set &data; _ObsNumber = _n_; %let ModelOption= / dist=&dist link=&link; %if %length(&offset)>0 %then %do; %let ModelOption= &ModelOption offset=&offset ; %end; %let RanOption= ; %if %length(&subject)>0 %then %do; %let RanOption= / subject=&subject ; %end; %if %length(&type)>0 %then %do; %let RanOption= &RanOption type=&type ; %end; %if %length(&subject)>0 and %length(&random)=0 %then %do; %let random=int; %end; title2 'Estimates of variances' ; %if %eval(&SecondCall)=0 %then %do; * Calculation for true model ; proc glimmix data=data2 method=&method; ods select CovParms; %if %eval(&fst)=0 %then %do; ods select none; %end; %if %eval(&OverDispersion)>0 %then %do; ods output CovParms=pmat; %end; class _ObsNumber &class; model &DepVar = &TrueModel &ModelOption; %if %length(&subject)>0 %then %do; random &random &RanOption; %end; %else %do; random &random _ObsNumber &RanOption; %end; run; Data _null_; set pmat nobs=cnt; call symput("PmatNum",left(put(cnt,8.))); run; %let PmatFix=1%pmatf(length=&PmatNum); /* %put Estimates of variance are given by &var; */ title2 'Calculation of components' ; * Calculation of l_max ; proc glimmix data=data2 method=laplace; ods select none; ods output FitStatistics=ParmLmax; class _ObsNumber &class; model &DepVar = _ObsNumber &ModelOption; %if %length(&subject)>0 %then %do; random &random &RanOption; %end; %else %do; random &random _ObsNumber &RanOption; %end; parms / pdata=pmat hold=&PmatFix noiter; run; * Calculation of l_sat ; proc glimmix data=data2 method=laplace; ods select none; ods output FitStatistics=ParmLsat; class _ObsNumber &class; model &DepVar = &TrueModel &ModelOption; %if %length(&subject)>0 %then %do; random &random &RanOption; %end; %else %do; random &random _ObsNumber &RanOption; %end; parms / pdata=pmat hold=&PmatFix noiter; run; * Calculation of l_null ; proc glimmix data=data2 method=laplace; ods select none; ods output FitStatistics=ParmLnull; class _ObsNumber &class; model &DepVar= &ModelOption; %if %length(&subject)>0 %then %do; random &random &RanOption; %end; %else %do; random &random _ObsNumber &RanOption; %end; parms / pdata=pmat hold=&PmatFix noiter; run; %end; *end of first call; * Calculation of l ; title2 'Estimates of parameters'; proc glimmix data=data2 method=laplace; ods select none; ods output FitStatistics=ParmL Tests3=Tests3 ParameterEstimates=ParaEst; class _ObsNumber &class; model &DepVar = &Model &ModelOption solution; %if %length(&subject)>0 %then %do; random &random &RanOption; %end; %else %do; random &random _ObsNumber &RanOption; %end; parms / pdata=pmat hold=&PmatFix noiter; run; %if %eval(&DisplayResults)>0 %then %do; data ParaEst2; set ParaEst; drop DF tValue Probt; run; ods select all; proc print data=ParaEst2 noobs; run; %end; proc means data=Tests3 noprint; var NumDF; output out=fixedDF sum=DF; run; %if %length(&Model)=0 %then %do; data fixedDF; DF=0; run; data ParmL; set ParmLnull; run; %end; title2 'Comparison of RD between models'; * Calculation of RD; data ParmLmax2; set ParmLmax; Lmax = -Value/2; keep Lmax; if _n_=1 then output; run; data ParmLsat2; set ParmLsat; Lsat = -Value/2; keep Lsat; if _n_=1 then output; run; data ParmLnull2; set ParmLnull; Lnull = -Value/2; keep Lnull; if _n_=1 then output; run; data ParmL2; set ParmL; L = -Value/2; keep L; if _n_=1 then output; run; data Result; set ParmLmax2; merge ParmLsat2 ParmLnull2 ParmL2 fixedDF; run; data RDcal; set Result; ModelDF = DF+1; RD = 1-(Lmax - L + ModelDF)/(Lmax - Lnull + 1); RSD = 1-(Lsat - L + ModelDF)/(Lsat - Lnull + 1); Model = symget("Model"); keep Model ModelDF RD RSD; run; %if %eval(&CCall)=0 %then %do; ods select all; proc print data=RDcal noobs; %end; run; %exit: %mend RDglimmix; * --- End of definition of macro RDglimmix --- ; * --- Definition of macro RDmixed --- ; %macro RDmixed(data=, class=, DepVar=, TrueModel=, Model=, random=, subject=, type=, repeated=, DisplayResults=1, CCall=0, fst=1, SecondCall=0); %if %length(&random)*%length(&repeated)>0 %then %do; %put; %put ** Error: Random and repeated statements cannot be used simultaneously. **; %put; %goto exit; %end; %let ModelOption= /; %let RanOption= /; %if %length(&subject)>0 %then %do; %let RanOption= &RanOption subject=&subject ; %end; %if %length(&type)>0 %then %do; %let RanOption= &RanOption type=&type ; %end; data data2; set &data; _ObsNumber = _n_; title2 'Estimates of variances'; %if %eval(&SecondCall)=0 %then %do; * Calculation for ture model ; proc mixed data=data2 method=REML; ods select CovParms; %if %Eval(&fst)=0 %then %do; ods select none; %end; ods output CovParms=pmat; class &class; model &DepVar = &TrueModel &ModelOption; %if %length(&random)>0 %then %do; random &random &RanOption g s; ods output g=gmat; %end; %if %length(&repeated)>0 %then %do; repeated &repeated &RanOption; %end; run; Data _null_; set pmat nobs=cnt; call symput("PmatNum",left(put(cnt,8.))); run; %let PmatFix=1%pmatf(length=&PmatNum); title2 'Calculation of components'; * Calculation of l_max ; proc mixed data=data2 method=ML; ods select none; ods output ParmSearch=ParmLmax; class _ObsNumber &class; model &DepVar = _ObsNumber; %if %length(&random)>0 %then %do; random &random &RanOption gdata=gmat;%end; %if %length(&repeated)>0 %then %do; repeated &repeated &RanOption; %end; parms /pdata=pmat hold=&PmatFix noiter noprofile; run; * Calculation of l_sat ; proc mixed data=data2 method=ML; ods select none; ods output ParmSearch=ParmLsat; class _ObsNumber &class; model &DepVar = &TrueModel; %if %length(&random)>0 %then %do; random &random &RanOption gdata=gmat;%end; %if %length(&repeated)>0 %then %do; repeated &repeated &RanOption; %end; parms /pdata=pmat hold=&PmatFix noiter noprofile; run; * Calculation of l_null ; proc mixed data=data2 method=ML; ods select none; ods output ParmSearch=ParmLnull; class _ObsNumber &class; model &DepVar = ; %if %length(&random)>0 %then %do; random &random &RanOption gdata=gmat;%end; %if %length(&repeated)>0 %then %do; repeated &repeated &RanOption; %end; parms /pdata=pmat hold=&PmatFix noiter noprofile; run; %end; *End of frist call; * Calculation of l ; title2 'Estimates of parameters'; %if %length(&Model)>0 %then %do; proc mixed data=data2 method=ML; ods select none; ods output ParmSearch=ParmL Tests3=Tests3 SolutionF=SolF; class _ObsNumber &class; model &DepVar = &Model &ModelOption solution; %if %length(&random)>0 %then %do; random &random &RanOption gdata=gmat;%end; %if %length(&repeated)>0 %then %do; repeated &repeated &RanOption; %end; parms /pdata=pmat hold=&PmatFix noiter noprofile; run; %if %eval(&DisplayResults)>0 %then %do; data SolF2; set SolF; drop DF tValue Probt; run; ods select all; proc print data=SolF2 noobs; run; %end; proc means data=Tests3 noprint; var NumDF; output out=fixedDF sum=DF; run; %end; %if %length(&Model)=0 %then %do; data fixedDF; DF=0; run; data ParmL; set ParmLnull; run; proc glm data=data2; ods select none; ods output ParameterEstimates=ParaEst; class _ObsNumber &class; model &DepVar = /solution; run; %if %Eval(&DisplayResults)>0 %then %do; data ParaEst2; set ParaEst; drop Dependent tValue Probt; run; ods select all; proc print data=ParaEst2 noobs; run; %end; %end; title2 'Comparison of RD between models'; * Calculation of RD; data ParmLmax2; set ParmLmax end=last; Lmax = LogLike; keep Lmax; if last then output; run; data ParmLsat2; set ParmLsat end=last; Lsat = LogLike; keep Lsat; if last then output; run; data ParmLnull2; set ParmLnull end=last; Lnull = LogLike; keep Lnull; if last then output; run; data ParmL2; set ParmL end=last; L = LogLike; keep L; if last then output; run; data Result; set ParmLmax2; merge ParmLsat2 ParmLnull2 ParmL2 fixedDF; run; data RDcal; set Result; ModelDF = DF+1; RD = 1-(Lmax - L + ModelDF)/(Lmax - Lnull + 1); RSD = 1-(Lsat - L + ModelDF)/(Lsat - Lnull + 1); Model = symget("Model"); keep Model ModelDF RD RSD; run; %if %eval(&CCall)=0 %then %do; ods select all; proc print data=RDcal noobs; %end; run; %exit: %mend RDmixed; * --- End of definition of macro RDmixed --- ; * --- Definition of macro RDgenmod --- ; %macro RDgenmod(data=, class=, DepVar=, TrueModel=, Model=, dist=, link=, type=,offset=, DisplayResults=1, CCall=0, fst=1, OverDispersion=1, scale=, SecondCall=0); %let ModelOption= /; data data2; set &data; _ObsNumber = _n_; %let ModelOption= / dist=&dist link=&link; %if %length(&offset)>0 %then %do; %let ModelOption= &ModelOption offset=&offset ; %end; %let ModelOption1= &ModelOption; %if %eval(&SecondCall)=0 %then %do; * Calculation for ture model ; proc genmod data=data2; %if %eval(&fst)=0 %then %do; ods select none; %end; %if %length(&scale)>0 %then %do; %let ModelOption1= &ModelOption1 &scale; %end; ods output ParameterEstimates=ParaEst1; class _ObsNumber &class; model &DepVar = &TrueModel &ModelOption1; run; data _null_; set ParaEst1 end=eof; if eof then call symput("SqPhi",left(put(Estimate,20.4))); if Parameter='Dispersion' then call symput("NBK",left(put(Estimate,20.4))); run; %if %length(&scale)>0 %then %do; %let ModelOption1= &ModelOption noscale scale=&SqPhi; %end; %else %if %bquote(&dist)=negbin %then %do; %let ModelOption1= &ModelOption noscale scale=&NBK; %end; %else %do; %let ModelOption1= &ModelOption; %end; * Calculation of components ; * Calculation of l_max ; proc genmod data=data2; ods select none; ods output Modelfit=ParmLmax; class _ObsNumber &class; model &DepVar = _ObsNumber &ModelOption1; run; * Calculation of l_sat ; proc genmod data=data2; ods select none; ods output Modelfit=ParmLsat; class _ObsNumber &class; model &DepVar = &TrueModel &ModelOption1; run; * Calculation of l_null ; proc genmod data=data2; ods select none; ods output Modelfit=ParmLnull; class _ObsNumber &class; model &DepVar= &ModelOption1; run; %end; *end of first call; data PhiEst; set ParaEst1 end=eof; Phi=Estimate*Estimate; if eof then call symput("SqPhi",left(put(Estimate,20.4))); if eof then output; if Parameter='Dispersion' then call symput("NBK",left(put(Estimate,20.4))); if Parameter='Dispersion' then output; keep Phi; run; %if %length(&scale)>0 %then %do; %let ModelOption1= &ModelOption noscale scale=&SqPhi; %end; %else %if %bquote(&dist)=negbin %then %do; %let ModelOption1= &ModelOption noscale scale=&NBK; %end; %else %do;%let ModelOption1= &ModelOption; %end; * Calculation of l ; title2 'Estimates of parameters'; proc genmod data=data2; ods select none; ods output Modelfit=ParmL ModelANOVA=Tests3 ParameterEstimates=ParaEst; class _ObsNumber &class; model &DepVar = &Model &ModelOption1 Type3; run; %if %eval(&DisplayResults)>0 %then %do; data ParaEst2; set ParaEst; keep Parameter Estimate StdErr; run; ods select all; proc print data=ParaEst2 noobs; run; %end; proc means data=ParaEst noprint; var DF; output out=fixedDF sum=DF; run; %if %length(&Model)=0 %then %do; data fixedDF; DF=1; run; * DF=1 only for GENMOD, because DF of intercept is counted only in GENMOD.; data ParmL; set ParmLnull; run; %end; title2 'Comparison of RD between models'; * Calculation of RD; data ParmLmax2; set ParmLmax; Lmax = Value; keep Lmax; if _n_=6 then output; run; data ParmLsat2; set ParmLsat; Lsat = Value; keep Lsat; if _n_=6 then output; run; data ParmLnull2; set ParmLnull; Lnull = Value; keep Lnull; if _n_=6 then output; run; data ParmL2; set ParmL; L = Value; keep L; if _n_=6 then output; run; data Result; set ParmLmax2; merge ParmLsat2 ParmLnull2 ParmL2 fixedDF; run; %if %length(&scale)>0 %then %do; data RDcal; set Result; merge PhiEst; ModelDF = DF; RD = 1-(Lmax - L + ModelDF*Phi)/(Lmax - Lnull + Phi); RSD = 1-(Lsat - L + ModelDF*Phi)/(Lsat - Lnull + Phi); Model = symget("Model"); keep Model ModelDF RD RSD; run; %end; %else %do; data RDcal; set Result; ModelDF = DF; RD = 1-(Lmax - L + ModelDF)/(Lmax - Lnull + 1); RSD = 1-(Lsat - L + ModelDF)/(Lsat - Lnull + 1); Model = symget("Model"); keep Model ModelDF RD RSD; run; %end; %if %eval(&CCall)=0 %then %do; ods select all; proc print data=RDcal noobs; %end; run; %exit: %mend RDgenmod; * --- End of definition of macro RDgenmod --- ; * --- required macros ---; %global PmatNum; %global PmatFix; %macro pmatf(length=); %do n=2 %to &length; ,&n %end; %mend pmatf; * --- End of all macro ---;