编辑代码

## 定义一些新的函数
##
## R^2_pred: R^2 based PRESS 
# PRESS: 计算模型的预测残差平方和 (Prediction Error Sum of Squares)
PRESS = function(g) sum(rstandard(g, type="pred")^2)
# R2pred: 计算模型的预测R^2 (R-squared based on PRESS)
R2pred = function(g) {y=g$model[,1]; 1- PRESS(g)/sum((y-mean(y))^2) }
# compare different models
# cmp.g: 比较不同的模型
# 使用测试数据集 (dat.test) 进行预测,并返回模型的相关性能指标
cmp.g = function(g) {
  pred = predict(g, dat.test)  # 使用模型预测测试数据
  Rfit = cor(fitted(g), g$model[,1])  # 拟合值与真实值的相关系数 (训练数据)
  Rtest = cor(pred, dat.test$y)  # 预测值与真实值的相关系数 (测试数据)
  Rmix = cor(c(fitted(g), pred), c(g$model[,1], dat.test$y))  # 拟合值+预测值的相关系数
  gs = summary(g)  # 获取模型摘要信息
  c(g$rank, g$df, gs$r.squared, gs$adj.r.squared, R2pred(g), gs$sigma, AIC(g), BIC(g), Rmix, Rtest)
}
  
permutations.R=function(n,v=1:n)
{ # permutations.R: 生成全排列
    if (n <= 0) NULL
    else if (n == 1) matrix(v[1:n], 1, n)
    else if (n == 2) matrix(c(v[1:n], v[n:1]), 2, n)
    else {
    	xi=NULL
    	for(i in 1:n) xi= rbind(xi, cbind(v[i], Recall(n-1,v[-i])))
    	xi
    	}
}

## baseline and zero-sum contrasts
# perm.base1: 为给定排列 (perm) 构造一个基线对比矩阵 (baseline contrast)
perm.base1=function(perm, base=n, debug=F, ...)
{ # construct a baseline contrast for a permutation, perm
	n=length(perm)
	# create a coindience matrix, x[i,j]=1 if ith position is drug j
	x =matrix(0,n,n)# 初始化对比矩阵
	for(i in 1:n) x[i, perm[i]] = 1# 根据perm将1放在对应位置
	if(debug)print(cbind(x))
	# convert to vector
	if(base>0) z=c(x[-base,-base]) # 排除基线行和列
	else z=c(x)
	# add label
	lab=outer(LETTERS[1:n], 1:n, paste, sep="")# 创建标签
	lab=t(lab)
	if(base>0) lab=c(lab[-base,-base])
	names(z)=lab# 给z的元素命名
	z
}

# perm.contr1: 为给定排列 (perm) 构造一个零和对比矩阵 (zero-sum contrast)
perm.contr1=function(perm, base=n, debug=F, ...)
{ # construct a zero-sum contrast for a permutation, perm
	n=length(perm)
	# create a coindience matrix, x[i,j]=1 if ith position is drug j
	x =matrix(0,n,n); 
	for(i in 1:n) x[i, perm[i]] = 1# 构造一致性矩阵
	# convert x to a contrast matrix
	y = sweep(x, 1, x[,base])  # substract the base column
	z = sweep(y, 2, y[base,])  # substract the base row
	if(debug)print(cbind(x,y,z))
	# convert to vector
	z=c(z[-base,-base])# 排除基线行和列
	# add label
	lab=outer(LETTERS[1:n], 1:n, paste, sep="")
	lab=t(lab)
	lab=c(lab[-base,-base])
	names(z)=lab# 给z命名
	z
}

# perm.contr: 使用对比矩阵 (contrast) 生成排列的对比向量
perm.contr=function(perms, contr=perm.contr1, debug=F, ...)
{ # contr: contrasts used; 
	if(is.vector(perms)) contr(perms, debug=debug, ...)
	else t(apply(perms, 1, contr, ...))
}

# perm.pair2: 为给定排列 (perm) 构造成对对比 (pairwise contrast)
perm.pair2=function(perm, debug=F, ...)
{ # construct a pairwise contrast for a permutation, perm
	n=length(perm)
	x =matrix(0,n,n)
	for(i in 1:(n-1)) for(j in (i+1):n) # all pairs
	{ # extending the PWO model, using a function of (j-i)
		if(perm[i]<perm[j])  x[perm[i], perm[j]] = (j-i)
		else x[perm[j], perm[i]] = -(j-i)
		}	
	# convert x to a contrast matrix
	if(debug)print(x)
	z=x[row(x)<col(x)]  # 取上三角矩阵
		# add label
	lab=outer(1:n, 1:n, paste, sep=".")#	lab=t(lab)
	lab=c(lab[row(x)<col(x)])
	lab=paste("I",lab, sep="")
	names(z)=lab
	z
}

# perm.pairwise: 构造成对模型的对比 (sign 对比)
perm.pairwise=function(perm, debug=F, ...)
{ # PWO model
	sign(perm.pair2(perm, debug=debug, ...))
}

# cp.plot: 绘制组分-位置 (component-position) 效应图
cp.plot=function(ybar, perms, xlab="Position", ylab="Mean Response", pch=LETTERS[1:m], ...)
{ # make component-position effects plot 
# ybar: response, perms: COA(N,m)
#	perms=perms-min(perms)+1  # adjust levels to 1-m	
	m=ncol(perms)
	tab=matrix(0,m,m)
	for(i in 1:m) tab[i,] = xtabs(ybar~perms[,i], drop=T)# 统计每个位置的响应
	df=(tab/(nrow(perms)/m))  # 平均调整
	df.mean=mean(df)
	df=as.data.frame(df)
	matplot(1:m, df,type="o", xlab=xlab, ylab=ylab, pch=pch, ...)
	abline(h=df.mean)# 添加平均响应的水平线
}

# ex: 对比示例
ex=function()
{ # examples of contrasts
perm.contr1(c(2,1,3), base=3, debug=T)
perm.contr(c(2,1,3), contr=perm.contr1, debug=T)
perm.contr(c(2,1,3), contr=perm.base1, debug=T)
perm.contr(c(2,1,3), contr=perm.base1, base=0, debug=T)
perm.contr(c(2,1,3), contr=perm.pairwise, debug=T)
}

# 读取 4-drug 数据文件 'coa24x4c.txt'
## analysis of 4-drug data: coa24x4c.txt
dat0=read.table("coa24x4c.txt", h=T); dat0
dat0[,2:5]=dat0[,2:5]+1# 位置索引从1开始
perms=as.matrix(dat0[,2:5])  # 4 drugs

# 区分批次1和批次2
dat0$batch=ifelse(dat0$coa12=="*", 1, 2)
sub1=(1:24)[dat0$batch==1]# 批次1的索引
sub2=(1:24)[dat0$batch==2]

# 绘制组分-位置图
# draw component-position plots
par(mfrow=c(1,3))
pch=c("0","1", "2", "3") # LETTERS[1:4]# 设置不同符号
cp.plot(dat0$y, perms, pch=pch, main="Both", ylim=c(35,55))
cp.plot(dat0$y[sub1], perms[sub1,], pch=pch, main="COA1", ylim=c(35,55))
cp.plot(dat0$y[-sub1], perms[-sub1,], pch=pch, main="COA2", ylim=c(35,55))

# 创建不同模型的对比矩阵
## create variables for different models 
X0=perm.contr(perms, contr=perm.base1, base=0)  # CPM, baseline
# X0=perm.contr(perms, contr=perm.contr1, base=1)  # CPM, zero-sum
X1=perm.contr(perms, contr=perm.pairwise)  # PWO 
dat0=as.data.frame(cbind(dat0, X0, X1))

## select a batch (or both batches) for modeling 
dat=dat0; dat.test=dat0 # all data
# dat=dat0[sub1, ]; dat.test=dat0[sub2, ]; # fit batch 1, test batch 2
# dat=dat0[sub2, ]; dat.test=dat0[sub1, ]; # fit batch 2, test batch 1

## fit CP models
g.cpm=g=g1=lm(y ~ A1+B1+C1+A2+B2+C2+A3+B3+C3, dat); summary(g) # full model
g=g1=lm(y ~ 1, dat);  # stepwise
g=step(g1, scope =list(upper = ~(A1+B1+C1+D1+A2+B2+C2+D2+A3+B3+C3+D3+A4+B4+C4+D4), lower = ~1), direction="both", k=log(nrow(dat)), trace=0); summary(g)
g.cpm1=g=lm(formula = y ~ C4 + B1 + D1 + B2 , data = dat); summary(g); 

## fit PWO models
g.pwo=g1=g=lm(y ~ (I1.2 + I1.3 + I2.3 + I1.4 + I2.4 + I3.4), dat); summary(g1, cor=F)  #
g=g1=lm(y ~ 1, dat);  # stepwise
g=step(g1, scope =list(upper = ~(I1.2 + I1.3 + I2.3 + I1.4 + I2.4 + I3.4), lower = ~1), direction="both", k=log(nrow(dat)), trace=0); summary(g)
g.pwo1=g=lm(formula = y ~ I2.3 + I3.4 + I2.4, data = dat); summary(g); 

# compare models
out=rbind(cmp.g(g.cpm), cmp.g(g.cpm1),  cmp.g(g.pwo), cmp.g(g.pwo1))
dimnames(out)[[1]]=c("CPM", "CPM1",  "PWO","PWO1")
dimnames(out)[[2]]=c("p", "DF", "Rsquare", "R.Adj", "R2pred", "Sigma","AIC", "BIC", "Rmix", "Rtest")
round(out,2)

##### making residual plots for the full CP models
dat=dat0 # all data
par(mfrow=c(1,3))
# using all data
plot(resid(g)~fitted(g), xlab="Fitted values", ylab="Residuals", ylim=c(-6,10.5))

# using batch 1
g1=lm(y ~A1+B1+C1+A2+B2+C2+A3+B3+C3, dat, sub=sub1); summary(g1, cor=F)  # 
pred=predict(g1, dat); res = dat$y - pred
plot(res ~ pred, xlab="Fitted values", ylab="Residuals", ylim=c(-6,10.5))
cor(dat$y, pred)
cor(dat$y[-sub1], pred[-sub1]) 

# using batch 2
g2=lm(y ~A1+B1+C1+A2+B2+C2+A3+B3+C3, dat, sub=sub2); summary(g1, cor=F)  # 
pred=predict(g2, dat); res = dat$y - pred
plot(res ~ pred, xlab="Fitted values", ylab="Residuals", ylim=c(-6,10.5))
cor(dat$y, pred)
cor(dat$y[-sub2], pred[-sub2]) 

## check other residual plots
#par(mfrow=c(2,2)); plot(g.cpm, 1:4)

###############
## 5-drug data 
dat0=read.table("coa40x5b.txt", h=T)[,-1]; ## COA paper 
dat0
dat0$batch=rep(0:1, c(20,20)) # coded as 0 or 1
dimnames(dat0)[[2]][6]="y"  # rename ybar as y
perms=as.matrix(dat0[,1:5]+1)

## create variables for different models 
X0=perm.contr(perms, contr=perm.base1, base=0)  # CPM
#X0=perm.contr(perms, contr=perm.contr1)  # CPM
X1=perm.contr(perms, contr=perm.pairwise)  # PWO 
dat0=as.data.frame(cbind(dat0,X0, X1))

dat=dat0; dat.test=dat0
## fit CP models 
g.cpm=g1=g=lm(y ~batch+(A1+B1+C1+D1+A2+B2+C2+D2+A3+B3+C3+D3+A4+B4+C4+D4), dat); summary(g1, cor=F);   

g=g1=lm(y ~ 1, dat);  # summary(g)
g=step(g1, scope =list(upper = ~batch+(A1+B1+C1+D1+E1+A2+B2+C2+D2+E2
            +A3+B3+C3+D3+E3+A4+B4+C4+D4+E4+A5+B5+C5+D5+E5)^2, lower = ~1), 
       direction="both", k=2, trace=0); summary(g)
g.cpm2=g=lm(formula = y ~ batch + C1 + E1 + E2 + C2 + D1 + E3 + E1:C2 + 
          E2:D1 , data = dat); summary(g)
 
## fit PWO models
g.pwo=g1=g=lm(y ~ batch+(I1.2 + I1.3 + I2.3 + I1.4 + I2.4 + I3.4 + I1.5 + I2.5 + I3.5 + I4.5), dat); summary(g1, cor=F)  

g=g1=lm(y ~ 1, dat);  
g=step(g1, scope =list(upper = ~batch+(I1.2 + I1.3 + I2.3 + I1.4 + I2.4 + I3.4 + I1.5 + I2.5 + I3.5 + I4.5)^2, lower = ~1), data=dat, direction="both", k=2, trace=0); summary(g)  

g.pwo2=g=lm(formula = y ~ batch + I3.4 + I2.5 + I1.5 + I2.3 + I2.4 + I1.5:I2.3 + I2.5:I2.3 + I3.4:I2.4, data = dat); summary(g) 

# compare models
out=rbind(cmp.g(g.cpm), cmp.g(g.cpm2), cmp.g(g.pwo), cmp.g(g.pwo2))
dimnames(out)[[1]]=c("CPM", "CPM2", "PWO", "PWO2")
dimnames(out)[[2]]=c("p", "DF", "Rsquare", "R.Adj",  "R2pred","Sigma", "AIC", "BIC",  "Rmix", "Rtest")
round(out,2)

# Note: R2pred is often small or could be negative if p/n is close to 1

###### make residual and CP plots
par(mfrow=c(1,2))
g=g.cpm # full CP model
plot(resid(g) ~ fitted(g), xlab="Fitted values", ylab="Residuals")
cp.plot(dat$y, perms, pch=c("0","1", "2", "3", "4"))

## make predictions for all 120 sequences
perms5all=permutations.R(5)
Sequence5all=apply(perms5all-1,1, function(x) paste(x,sep="",collapse =""))
X0=perm.contr(perms5all, contr=perm.base1, base=0)  # CPM
X1=perm.contr(perms5all, contr=perm.pairwise)  # PWO 
df.X1= as.data.frame(cbind(batch=0,X0, X1)) # based on batch 1

## possible models
pred=cbind(cpm=predict(g.cpm,df.X1),cpm2=predict(g.cpm2,df.X1), pwo=predict(g.pwo,df.X1), pwo2=predict(g.pwo2,df.X1))
# pairs(pred)
round(cor(pred),2) # correlation between predictions  

res=as.data.frame(cbind(perms5all-1, Sequence5all, round(pred,2)));
ii=4 # model 1, 2, 3, or 4
res1=res[rev(order(pred[,ii])),][1:20,]; dimnames(res1)[[1]]=1:nrow(res1); res1 # top sequences

### perform Vuong's (1989) Tests of Non-Nested Models 
###
## Create a new function by modifying vuongtest() in library(nonnest2)
## because the aic and bic options in vuongtest() are not correct
##  library(nonnest2)
vuongtest.adj <- function (object1, object2, nested = FALSE, adj = "none", ll1 = llcont, ll2 = llcont, score1 = NULL, score2 = NULL, vc1 = vcov, vc2 = vcov) 
{ 
## adding testadj.factor=(1/sqrt(n)) /sqrt(omega.hat.2) for aic and bic
  obinfo <- check.obj(object1, object2)
  callA <- obinfo$callA
  classA <- obinfo$classA
  callB <- obinfo$callB
  classB <- obinfo$classB
  llA <- ll1(object1)
  llB <- ll2(object2)
  if (nested) {
    if (sum(llB, na.rm = TRUE) > sum(llA, na.rm = TRUE)) {
      tmp <- object1
      object1 <- object2
      object2 <- tmp
      tmp <- llA
      llA <- llB
      llB <- tmp
      tmp <- callA
      callA <- callB
      callB <- tmp
    }
  }
  nmis <- sum(is.na(llA))
  n <- NROW(llA) - nmis
  omega.hat.2 <- (n - 1)/n * var(llA - llB, na.rm = TRUE)
  lamstar <- calcLambda(object1, object2, n, score1, score2, 
                        vc1, vc2)
  pOmega <- imhof(n * omega.hat.2, lamstar^2)$Qq
  lr <- sum(llA - llB, na.rm = TRUE)
  teststat <- (1/sqrt(n)) * lr/sqrt(omega.hat.2)
  testadj.factor = (1/sqrt(n)) /sqrt(omega.hat.2)  # adjust factor for aic and bic
  if (classA %in% c("SingleGroupClass", "MultipleGroupClass")) {
    nparA <- mirt::extract.mirt(object1, "nest")
  }
  else {
    nparA <- length(coef(object1))
  }
  if (classB %in% c("SingleGroupClass", "MultipleGroupClass")) {
    nparB <- mirt::extract.mirt(object2, "nest")
  }
  else {
    nparB <- length(coef(object2))
  }
  if (adj == "aic") {
    teststat <- teststat - (nparA - nparB)*testadj.factor
  }
  if (adj == "bic") {
    teststat <- teststat - (nparA - nparB) * log(n)/2 *testadj.factor
  }
  if (nested) {
    teststat <- 2 * lr
    pLRTA <- imhof(teststat, -lamstar)[[1]]
    pLRTB <- NA
  }
  else {
    pLRTA <- pnorm(teststat, lower.tail = FALSE)
    pLRTB <- pnorm(teststat)
  }
  rval <- list(omega = omega.hat.2, p_omega = pOmega, LRTstat = teststat, 
               p_LRT = list(A = pLRTA, B = pLRTB), class = list(class1 = classA, 
                                                                class2 = classB), call = list(call1 = callA, call2 = callB), 
               nested = nested)
  class(rval) <- "vuongtest"
  return(rval)
}
environment(vuongtest.adj) <- asNamespace("nonnest2")

##  library(nonnest2)
## vuongtest(g.cpm1,g.pwo1, adj="bic") # the test is wrong
## 4-drug data
vuongtest.adj(g.cpm1,g.pwo1, adj="bic") # no difference, p= 0.4323
## 5-drug data
vuongtest.adj(g.cpm2,g.pwo2, adj="bic") # p= 0.138 favoring g.cpm2