# ---------------------------------------------------------------------------- # RDcompare # # ---------------------------------------------------------------------------- # Revised on Dec 1, 2024 # EXPLANATION # This function calculates RD for generalized linear models. # All combinations of explanatory variables are compared if the combinations belong to the hierarchical family of models. # The models are listed in the descending order of RD. # Original reference for RD: # Kohji Yamamura (2016) # Estimation of the predictive ability of ecological models # Communications in Statistics - Simulation and Computation, 45:2122-2144 # DOI: 10.1080/03610918.2014.889161 # INPUT PARAMETERS # Please specify the saturated model or the most complex model as the formula. # OPTIONS # max.ranking: Number of models shown in the ranking in the output. The default number of 50. # ShowModel: Number of models of which the parameters are shown in the output. The default number is 5. # Compulsory: The factors that should be included compulsorily. If we specify Compulsory="Trap", for example, the factor "Trap" is always included in the model. Notice that the compulsory factors should also be included in the model formula. # noint: We should use noint=TRUE instead of using the form "-1 +", if we want to omit the intercept. The default setting is noint=FALSE. # phi: Dispersion parameter. We should use this option if we already have a reliable estimate of dispersion parameter. # Progress: If Progress=TRUE, the progress of calculation is shown on the console window. Default setting is FALSE. # --- Start of RDcompare function --- RDcompare <- function(formula, family, data=NULL, weights, subset, phi, ShowModel=5, max.ranking=50, noint=FALSE, na.action, start = NULL, etastart, mustart, offset, control = list(...), method = "glm.fit", singular.ok = TRUE, contrasts = NULL, Compulsory=NULL, Progress=FALSE){ Ranking <- max.ranking ShowModel <- ceiling(ShowModel) log <- FALSE mff <- terms.formula(formula,data=data) org.model <- gsub("[[:blank:]]","",deparse(formula,width.cutoff=500)) depvar <- strsplit(org.model,"~")[[1]][1] fact.name <- attr(mff,"term.labels") fact.number.full <- length(fact.name) if(!is.null(Compulsory)){ fact.name <- fact.name[-which(fact.name %in% unlist(strsplit(Compulsory,"\\+")))] Msg <- "* Compulsory terms are not specified in a correct form.\n * All compulsory terms should be included also in the model.\n * Name of a compulsory interaction should be arranged with the same sequence with the main effect.\n * For example, if we used \"Month*Trap\" or \"Month+Trap+Trap:Month\" as the model formula,\n * we should use \"Month:Trap\" but should not use \"Trap:Month\" as the interaction term." if(length(unlist(strsplit(Compulsory,"\\+"))) > fact.number.full-length(fact.name) | length(fact.name)==0){stop(Msg)} } fact.number <- length(fact.name) model.number <- 2^fact.number result <- 0 mf <- match.call() deparse.mf <- deparse(mf,width.cutoff=500) deparse.mf2 <- strsplit(deparse.mf,"~")[[1]][2] mf.arg <- unlist(strsplit(gsub(" +","",deparse.mf2),","), recursive = TRUE) mf.arg.n <- length(mf.arg) mf.arg[mf.arg.n] <- substring(mf.arg[mf.arg.n],1,nchar(mf.arg[mf.arg.n])-1) mf.arg.others <- "" for(k in 2:mf.arg.n){ if(charmatch("phi",mf.arg[k],nomatch = 0)>0) next if(charmatch("ShowModel",mf.arg[k],nomatch = 0)>0) next if(charmatch("max.ranking",mf.arg[k],nomatch = 0)>0) next if(charmatch("noint",mf.arg[k],nomatch = 0)>0) next if(charmatch("Compulsory",mf.arg[k],nomatch = 0)>0) next if(charmatch("Progress",mf.arg[k],nomatch = 0)>0) next mf.arg.others <- paste(mf.arg.others,mf.arg[k],sep=",") } mf.arg.others <- paste(mf.arg.others,")",sep="") # Calculation for true model. indepvar <- paste(fact.name,collapse="+") if(!is.null(Compulsory)){ if(indepvar==""){org.model <- paste(depvar,"~",Compulsory,sep="")} else {org.model <- paste(depvar,"~",Compulsory,"+",indepvar,sep="")} }else{ org.model <- paste(depvar,"~",indepvar,sep="")} mf.char <- paste("glm(",org.model,sep="") if (noint==TRUE) mf.char <- paste(mf.char,"-1 ",sep="") mf.char <- paste(mf.char,mf.arg.others,sep="") if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} true.model <- eval(parse(text=mf.char)) # Calculation for null model. model <- paste(depvar," ~ 1 ",sep="") mf.char <- paste("glm(",model,sep="") mf.char <- paste(mf.char,mf.arg.others,sep="") if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} null.model <- eval(parse(text=mf.char)) # Creating all combinations of fact.name RD <- c() RSD <- c() k <- c() Model <- c() if(Progress==TRUE){ cat("0---20---40---60---80---100\n"); proglen1 <- 0 } for(i in 1:model.number){ if(Progress==TRUE){ proglen2 <- floor(100*i/model.number/4) if(proglen2 > proglen1){ for(progleni in 1:(proglen2-proglen1)){cat("*"); flush.console()} proglen1 <- proglen2} } if (log==TRUE) cat("model.number=",i,"\n") flag <- 1 if (noint==FALSE) model <- paste(depvar," ~ 1 ",sep="") else model <- paste(depvar," ~ -1 ",sep="") if(!is.null(Compulsory)){ model <- paste(model,Compulsory,sep=" + ") model <- paste(gsub("\\+"," \\+ ",model)," ",sep="") } for(j in 1:fact.number){ if (((i-0.5)%/%(2^(fact.number-j)))%%2==1) { # Splitting the interaction into the elements split <- strsplit(fact.name[j],":") splitn <- length(split[[1]]) # Generating all partial combinations of elements splitm <- 2^splitn for(m in 1:splitm){ inter <- "" for(n in 1:splitn){ if (((m-0.5)%/%(2^(splitn-n)))%%2==1) inter <- ifelse(inter=="",inter <- split[[1]][n],paste(inter,":",split[[1]][n],sep=""))} # The flag is kept at 1 only if all partial combinations in the interaction are found in the previous part of the model. if ((inter!=fact.name[j]) && (inter!="")) { flag <- flag*ifelse(length(grep(paste(" ",inter," ",sep=""),model))>0,1,0) if (log==TRUE) cat("flag=",flag,", try adding",fact.name[j],", cheking subfactor",inter,"\n") } } model <- paste(model,"+ ",fact.name[j]," ",sep="") } } if (strsplit(model,"~")[[1]][2]==" -1 ") model <-paste(depvar," ~ 1 ",sep="") if (log==TRUE) cat("model: ",model,"\n\n") # Calculation is performed for the candidate model if the flag remains 1 if (flag==1) { # Calculation for test model. mf.char <- paste("glm(",model,sep="") mf.char <- paste(mf.char,mf.arg.others,sep="") if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} test.model <- eval(parse(text=mf.char)) # Calculating RD and RSD RDcal <- RDcompare.cal(test.model,true.model,null.model,phi) RD <- c(RD,RDcal[1]) RSD <- c(RSD,RDcal[2]) k <- c(k,RDcal[3]) Model <- c(Model,model) } } if(Progress==TRUE) cat("\n") result <- data.frame(RD,RSD,k,Model) result <- result[order(result$RD,decreasing=TRUE),] rownames(result) <- c() cat("\n# RD ranking for the hierarchical family of models #\n") print(head(result,Ranking)) output <- list(Ranking=result) # Parameters of the best model. family <- true.model$family$family df <- true.model$df.residual if(missing(phi)==0) phi <- phi else phi <- if((family=="binomial")||(family=="poisson")) 1 else sum((true.model$weights*true.model$residuals^2)/df) if(ShowModel>0){ cat("\n# phi = ",phi," #\n"); cat("\n# Parameters of the best",ShowModel,"model #\n"); for(m in 1:min(ShowModel,length(RD))){ mf.char <- paste("glm(",result[m,4],sep="") mf.char <- paste(mf.char,mf.arg.others,sep="") if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} best.model <- summary.glm(eval(parse(text=mf.char)),dispersion=phi) cat("# Model",m,"#\n") print(coefficients(best.model)[,c(1,2)]) tmp <- list(coefficients(best.model)[,c(1,2)]) names(tmp) <- paste("Model",m,sep="_") output <- c(output,tmp) } } final <- output } # --- End of RDcompare function --- # ---------------------------------------------------------------------------- # RDcompareNB # # ---------------------------------------------------------------------------- RDcompareNB <- function(formula, data=NULL, weights, subset, ShowModel=5, max.ranking=50, noint=FALSE, na.action, start = NULL, etastart, mustart,offset, control = list(...), method = "glm.fit", contrasts = NULL, Compulsory=NULL){ library(MASS) Ranking <- max.ranking ShowModel <- ceiling(ShowModel) log <- FALSE mff <- terms.formula(formula,data=data) org.model <- gsub("[[:blank:]]","",deparse(formula,width.cutoff=500)) depvar <- strsplit(org.model,"~")[[1]][1] fact.name <- attr(mff,"term.labels") fact.number.full <- length(fact.name) if(!is.null(Compulsory)){ fact.name <- fact.name[-which(fact.name %in% unlist(strsplit(Compulsory,"\\+")))] Msg <- "* Compulsory terms are not specified in a correct form.\n * All compulsory terms should be included also in the model.\n * Name of a compulsory interaction should be arranged with the same sequence with the main effect.\n * For example, if we used \"Month*Trap\" or \"Month+Trap+Trap:Month\" as the model formula,\n * we should use \"Month:Trap\" but should not use \"Trap:Month\" as the interaction term." if(length(unlist(strsplit(Compulsory,"\\+"))) > fact.number.full-length(fact.name) | length(fact.name)==0){stop(Msg)} } fact.number <- length(fact.name) model.number <- 2^fact.number result <- 0 mf <- match.call() deparse.mf <- deparse(mf,width.cutoff=500) deparse.mf2 <- strsplit(deparse.mf,"~")[[1]][2] mf.arg <- unlist(strsplit(gsub(" +","",deparse.mf2),","), recursive = TRUE) mf.arg.n <- length(mf.arg) mf.arg[mf.arg.n] <- substring(mf.arg[mf.arg.n],1,nchar(mf.arg[mf.arg.n])-1) mf.arg.others <- "" for(k in 2:mf.arg.n){ if(charmatch("phi",mf.arg[k],nomatch = 0)>0) next if(charmatch("ShowModel",mf.arg[k],nomatch = 0)>0) next if(charmatch("max.ranking",mf.arg[k],nomatch = 0)>0) next if(charmatch("noint",mf.arg[k],nomatch = 0)>0) next if(charmatch("Compulsory",mf.arg[k],nomatch = 0)>0) next mf.arg.others <- paste(mf.arg.others,mf.arg[k],sep=",") } mf.arg.others <- paste(mf.arg.others,")",sep="") # Calculation for true model. indepvar <- strsplit(org.model,"~")[[1]][2] if(!is.null(Compulsory)){indepvar <- paste(Compulsory,indepvar,sep="+")} org.modelNB <- paste(depvar,"~",indepvar,sep="") mf.char <- paste("glm.nb(",org.modelNB,sep="") if (noint==TRUE) mf.char <- paste(mf.char,"-1 ",sep="") mf.char <- paste(mf.char,mf.arg.others,sep="") if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} true.model <- eval(parse(text=mf.char)) theta <- true.model$theta cat("# Negative binomial model: theta=",theta,"#\n") mf.char <- paste("RDcompare(",org.model,sep="") fam <- paste("family=negative.binomial(theta=",deparse(theta),")",sep="") mf.char <- paste(mf.char,fam,sep=",") mf.char <- paste(mf.char,",phi=1",sep="") for(k in 2:mf.arg.n){ mf.char <- paste(mf.char,mf.arg[k],sep=",") } if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")} eval(parse(text=mf.char)) } # --- End of RDcompareNB function --- # ---------------------------------------------------------------------------- # RDcompare.cal # # ---------------------------------------------------------------------------- # The following function calculates RD for a specified model. RDcompare.cal <- function(test.model,true.model,null.model,phi){ family <- true.model$family$family df <- true.model$df.residual deviance <- test.model$deviance t.deviance <- true.model$deviance if(missing(null.model)) n.deviance <- true.model$null.deviance else n.deviance <- null.model$deviance k <- ncol(model.matrix(test.model)) if(missing(phi)==0) phi <- phi else phi <- if((family=="binomial")||(family=="poisson")) 1 else sum((true.model$weights*true.model$residuals^2)/df) RD <- 1-(deviance+2*phi*k)/(n.deviance+2*phi) RSD <- 1-(deviance-t.deviance+2*phi*k)/(n.deviance-t.deviance+2*phi) c(RD=RD,RSD=RSD,k=k) } # --- End of RDcompare.cal function ---