# ----------------------------------------------------------------------------
# RDcompare #
# ----------------------------------------------------------------------------

# Revised on Dec 1, 2024

# EXPLANATION

# This function calculates RD for generalized linear models. 
# All combinations of explanatory variables are compared if the combinations belong to the hierarchical family of models.
# The models are listed in the descending order of RD.

# Original reference for RD: 
# Kohji Yamamura (2016)
# Estimation of the predictive ability of ecological models
# Communications in Statistics - Simulation and Computation, 45:2122-2144
# DOI: 10.1080/03610918.2014.889161

# INPUT PARAMETERS

# Please specify the saturated model or the most complex model as the formula.

# OPTIONS

# max.ranking: Number of models shown in the ranking in the output. The default number of 50.

# ShowModel: Number of models of which the parameters are shown in the output. The default number is 5.

# Compulsory: The factors that should be included compulsorily. If we specify Compulsory="Trap", for example, the factor "Trap" is always included in the model. Notice that the compulsory factors should also be included in the model formula.

# noint: We should use noint=TRUE instead of using the form "-1 +", if we want to omit the intercept. The default setting is noint=FALSE.

# phi: Dispersion parameter. We should use this option if we already have a reliable estimate of dispersion parameter.

# Progress: If Progress=TRUE, the progress of calculation is shown on the console window. Default setting is FALSE. 

# --- Start of RDcompare function ---

RDcompare <- function(formula, family, data=NULL, weights, subset, phi, ShowModel=5,
    max.ranking=50, noint=FALSE, na.action, start = NULL, etastart, mustart,
    offset, control = list(...), method = "glm.fit", singular.ok = TRUE,
    contrasts = NULL, Compulsory=NULL, Progress=FALSE){

Ranking <- max.ranking
ShowModel <- ceiling(ShowModel)
log <- FALSE
mff <- terms.formula(formula,data=data)
org.model <- gsub("[[:blank:]]","",deparse(formula,width.cutoff=500))
depvar <- strsplit(org.model,"~")[[1]][1]
fact.name <- attr(mff,"term.labels")
fact.number.full <- length(fact.name)
if(!is.null(Compulsory)){
fact.name <- fact.name[-which(fact.name %in% unlist(strsplit(Compulsory,"\\+")))]
Msg <- "* Compulsory terms are not specified in a correct form.\n  * All compulsory terms should be included also in the model.\n  * Name of a compulsory interaction should be arranged with the same sequence with the main effect.\n  * For example, if we used \"Month*Trap\" or \"Month+Trap+Trap:Month\" as the model formula,\n  * we should use \"Month:Trap\" but should not use \"Trap:Month\" as the interaction term."
if(length(unlist(strsplit(Compulsory,"\\+"))) > fact.number.full-length(fact.name) | length(fact.name)==0){stop(Msg)}
}
fact.number <- length(fact.name)
model.number <- 2^fact.number
result <- 0

mf <- match.call()
deparse.mf <- deparse(mf,width.cutoff=500)
deparse.mf2 <- strsplit(deparse.mf,"~")[[1]][2]
mf.arg <- unlist(strsplit(gsub(" +","",deparse.mf2),","), recursive = TRUE)
mf.arg.n <- length(mf.arg)
mf.arg[mf.arg.n] <- substring(mf.arg[mf.arg.n],1,nchar(mf.arg[mf.arg.n])-1)

mf.arg.others <- ""
for(k in 2:mf.arg.n){
if(charmatch("phi",mf.arg[k],nomatch = 0)>0) next
if(charmatch("ShowModel",mf.arg[k],nomatch = 0)>0) next
if(charmatch("max.ranking",mf.arg[k],nomatch = 0)>0) next
if(charmatch("noint",mf.arg[k],nomatch = 0)>0) next
if(charmatch("Compulsory",mf.arg[k],nomatch = 0)>0) next
if(charmatch("Progress",mf.arg[k],nomatch = 0)>0) next
mf.arg.others <- paste(mf.arg.others,mf.arg[k],sep=",")
}
mf.arg.others <- paste(mf.arg.others,")",sep="")

# Calculation for true model.
indepvar <- paste(fact.name,collapse="+")
if(!is.null(Compulsory)){
if(indepvar==""){org.model <- paste(depvar,"~",Compulsory,sep="")} else {org.model <- paste(depvar,"~",Compulsory,"+",indepvar,sep="")}
}else{
org.model <- paste(depvar,"~",indepvar,sep="")}
mf.char <- paste("glm(",org.model,sep="")
if (noint==TRUE) mf.char <- paste(mf.char,"-1 ",sep="")
mf.char <- paste(mf.char,mf.arg.others,sep="")
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
true.model <- eval(parse(text=mf.char))

# Calculation for null model.
model <- paste(depvar," ~ 1 ",sep="")
mf.char <- paste("glm(",model,sep="")
mf.char <- paste(mf.char,mf.arg.others,sep="")
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
null.model <- eval(parse(text=mf.char))

# Creating all combinations of fact.name

RD <- c()
RSD <- c()
k <- c()
Model <- c()

if(Progress==TRUE){
cat("0---20---40---60---80---100\n"); proglen1 <- 0
}
for(i in 1:model.number){
if(Progress==TRUE){
proglen2 <- floor(100*i/model.number/4)
if(proglen2 > proglen1){
for(progleni in 1:(proglen2-proglen1)){cat("*"); flush.console()}
proglen1 <- proglen2}
}
if (log==TRUE) cat("model.number=",i,"\n")
flag <- 1
if (noint==FALSE) model <- paste(depvar," ~ 1 ",sep="")
else model <- paste(depvar," ~ -1 ",sep="")

if(!is.null(Compulsory)){
model <- paste(model,Compulsory,sep=" + ")
model <- paste(gsub("\\+"," \\+ ",model)," ",sep="")
}

for(j in 1:fact.number){
if (((i-0.5)%/%(2^(fact.number-j)))%%2==1) {

# Splitting the interaction into the elements
split <- strsplit(fact.name[j],":")
splitn <- length(split[[1]])

# Generating all partial combinations of elements
splitm <- 2^splitn
for(m in 1:splitm){
inter <- ""
for(n in 1:splitn){
if (((m-0.5)%/%(2^(splitn-n)))%%2==1) inter <- ifelse(inter=="",inter <- split[[1]][n],paste(inter,":",split[[1]][n],sep=""))}

# The flag is kept at 1 only if all partial combinations in the interaction are found in the previous part of the model. 
if ((inter!=fact.name[j]) && (inter!="")) {
flag <- flag*ifelse(length(grep(paste(" ",inter," ",sep=""),model))>0,1,0)
if (log==TRUE) cat("flag=",flag,", try adding",fact.name[j],", cheking subfactor",inter,"\n")
}
}
model <- paste(model,"+ ",fact.name[j]," ",sep="")
}
}
if (strsplit(model,"~")[[1]][2]==" -1 ") model <-paste(depvar," ~ 1 ",sep="")
if (log==TRUE) cat("model: ",model,"\n\n")

# Calculation is performed for the candidate model if the flag remains 1
if (flag==1) {

# Calculation for test model.
mf.char <- paste("glm(",model,sep="")
mf.char <- paste(mf.char,mf.arg.others,sep="")
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
test.model <- eval(parse(text=mf.char))

# Calculating RD and RSD
RDcal <- RDcompare.cal(test.model,true.model,null.model,phi)

RD <- c(RD,RDcal[1])
RSD <- c(RSD,RDcal[2])
k <- c(k,RDcal[3])
Model <- c(Model,model)
}
}
if(Progress==TRUE) cat("\n")
result <- data.frame(RD,RSD,k,Model)
result <- result[order(result$RD,decreasing=TRUE),]
rownames(result) <- c()

cat("\n# RD ranking for the hierarchical family of models #\n")
print(head(result,Ranking))
output <- list(Ranking=result)

# Parameters of the best model.
family <- true.model$family$family
df <- true.model$df.residual
if(missing(phi)==0) phi <- phi
else 
phi <- if((family=="binomial")||(family=="poisson"))
   1
else
   sum((true.model$weights*true.model$residuals^2)/df)
if(ShowModel>0){
cat("\n# phi = ",phi," #\n");
cat("\n# Parameters of the best",ShowModel,"model #\n");
for(m in 1:min(ShowModel,length(RD))){
mf.char <- paste("glm(",result[m,4],sep="")
mf.char <- paste(mf.char,mf.arg.others,sep="")
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
best.model <- summary.glm(eval(parse(text=mf.char)),dispersion=phi)
cat("# Model",m,"#\n")
print(coefficients(best.model)[,c(1,2)])
tmp <- list(coefficients(best.model)[,c(1,2)])
names(tmp) <- paste("Model",m,sep="_")
output <- c(output,tmp)
}
}
final <- output
}
# --- End of RDcompare function ---

# ----------------------------------------------------------------------------
# RDcompareNB #
# ----------------------------------------------------------------------------

RDcompareNB <- function(formula, data=NULL, weights, subset, 
    ShowModel=5, max.ranking=50, noint=FALSE, na.action, start = NULL,
    etastart, mustart,offset, control = list(...),
    method = "glm.fit", contrasts = NULL, Compulsory=NULL){

library(MASS)

Ranking <- max.ranking
ShowModel <- ceiling(ShowModel)
log <- FALSE
mff <- terms.formula(formula,data=data)
org.model <- gsub("[[:blank:]]","",deparse(formula,width.cutoff=500))
depvar <- strsplit(org.model,"~")[[1]][1]
fact.name <- attr(mff,"term.labels")
fact.number.full <- length(fact.name)
if(!is.null(Compulsory)){
fact.name <- fact.name[-which(fact.name %in% unlist(strsplit(Compulsory,"\\+")))]
Msg <- "* Compulsory terms are not specified in a correct form.\n  * All compulsory terms should be included also in the model.\n  * Name of a compulsory interaction should be arranged with the same sequence with the main effect.\n  * For example, if we used \"Month*Trap\" or \"Month+Trap+Trap:Month\" as the model formula,\n  * we should use \"Month:Trap\" but should not use \"Trap:Month\" as the interaction term."
if(length(unlist(strsplit(Compulsory,"\\+"))) > fact.number.full-length(fact.name) | length(fact.name)==0){stop(Msg)}
}
fact.number <- length(fact.name)
model.number <- 2^fact.number
result <- 0

mf <- match.call()
deparse.mf <- deparse(mf,width.cutoff=500)
deparse.mf2 <- strsplit(deparse.mf,"~")[[1]][2]
mf.arg <- unlist(strsplit(gsub(" +","",deparse.mf2),","), recursive = TRUE)
mf.arg.n <- length(mf.arg)
mf.arg[mf.arg.n] <- substring(mf.arg[mf.arg.n],1,nchar(mf.arg[mf.arg.n])-1)

mf.arg.others <- ""
for(k in 2:mf.arg.n){
if(charmatch("phi",mf.arg[k],nomatch = 0)>0) next
if(charmatch("ShowModel",mf.arg[k],nomatch = 0)>0) next
if(charmatch("max.ranking",mf.arg[k],nomatch = 0)>0) next
if(charmatch("noint",mf.arg[k],nomatch = 0)>0) next
if(charmatch("Compulsory",mf.arg[k],nomatch = 0)>0) next
mf.arg.others <- paste(mf.arg.others,mf.arg[k],sep=",")
}
mf.arg.others <- paste(mf.arg.others,")",sep="")

# Calculation for true model.
indepvar <- strsplit(org.model,"~")[[1]][2]
if(!is.null(Compulsory)){indepvar <- paste(Compulsory,indepvar,sep="+")}
org.modelNB <- paste(depvar,"~",indepvar,sep="")
mf.char <- paste("glm.nb(",org.modelNB,sep="")
if (noint==TRUE) mf.char <- paste(mf.char,"-1 ",sep="")
mf.char <- paste(mf.char,mf.arg.others,sep="")
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
true.model <- eval(parse(text=mf.char))
theta <- true.model$theta
cat("# Negative binomial model: theta=",theta,"#\n")

mf.char <- paste("RDcompare(",org.model,sep="")
fam <- paste("family=negative.binomial(theta=",deparse(theta),")",sep="")
mf.char <- paste(mf.char,fam,sep=",")
mf.char <- paste(mf.char,",phi=1",sep="")
for(k in 2:mf.arg.n){
mf.char <- paste(mf.char,mf.arg[k],sep=",")
}
if(nchar(gsub("[^\\(]","", mf.char)) > nchar(gsub("[^\\)]","", mf.char))) {mf.char <- paste(mf.char,")",sep="")}
eval(parse(text=mf.char))
}
# --- End of RDcompareNB function ---

# ----------------------------------------------------------------------------
# RDcompare.cal #
# ----------------------------------------------------------------------------

# The following function calculates RD for a specified model.

RDcompare.cal <- function(test.model,true.model,null.model,phi){
family <- true.model$family$family
df <- true.model$df.residual
deviance <- test.model$deviance
t.deviance <- true.model$deviance
if(missing(null.model)) n.deviance <- true.model$null.deviance
else n.deviance <- null.model$deviance
k <- ncol(model.matrix(test.model))
if(missing(phi)==0) phi <- phi
else 
phi <- if((family=="binomial")||(family=="poisson"))
   1
else sum((true.model$weights*true.model$residuals^2)/df)
RD <- 1-(deviance+2*phi*k)/(n.deviance+2*phi)
RSD <- 1-(deviance-t.deviance+2*phi*k)/(n.deviance-t.deviance+2*phi)
c(RD=RD,RSD=RSD,k=k)
} 
# --- End of RDcompare.cal function ---