# this file contains three methods of analysis: 1) the method proposed in Paik (2004), # 2) ordinary CLR ignoring the subjects with missing X, and 3) the proposed method # To run this code one first need to compile the Fortran program data_simusub.f # using the command # >f77 -02 -c data_simusub.f #in unix station, and then load the # subprogram in the shared library of R using the command # >R CMD SHLIB data_simusub.o # library(survival) dyn.load("data_simusub.so") a=read.table("LAcancer.txt") age=(a[, 3]-min(a[, 3]))/(max(a[, 3])-min(a[, 3]))# matching variable # the 6th col of a represents the exposure variable which has missing values. # the missing values are denoted by 9. # the 4th col represents the exposure variable which is always observed. matchcc=data.frame(a[, 2], age, a[, 4], a[, 6]) r=rep(0, 315) a6=a[, 6] r[a6!=9]<-1 ik=rep(0, 5) ncase=63; # this represents the number of matched sets. indx=NULL for( i in 1: ncase){ vec=NULL for( l in 1: 5){ ik[l]=5*(i-1)+l if(r[ik[l]]==0) vec=c(vec, ik[l]) } if((r[ik[1]]==0) ||(sum(r[ik[1]:ik[5]])==0)) {indx=c(indx, ik[1], ik[2], ik[3], ik[4], ik[5])} else {indx=c(indx, vec)} } orgmatchcc=cbind(matchcc, r) paikbeta=NULL matchcc=orgmatchcc matchcc=as.matrix(matchcc) storage.mode(matchcc)<-"double" nr=nrow(matchcc); nc=ncol(matchcc) ndthet=9; theta=rep(0.001, ndthet); theta_old=theta; lglk=0; lglk=as.double(lglk); #####theta_x is fixed to certain values h=function(theta) { fn1=.Fortran("newpaikstage1", matchcc, nrow=as.integer(nr), ncol=as.integer(nc), theta=as.double(theta), theta_old=as.double(theta_old), ndthet=as.integer(ndthet), output=lglk) #theta=c(delta0, deltad, deltas, deltax, deltaz, gamma0, gammad, gammas, gammaz) -fn1$output } eps=1.0 while(eps>0.01){ theta=theta_old out=nlm(h, theta) theta_old=out$estimate print(theta_old) out$minimum eps=sum(abs((theta-theta_old)/theta)) } theta=theta_old ############################################### ##### Capital phi term1=theta[1]+theta[2]+theta[3]*matchcc[, 2]+ theta[4]*matchcc[, 4]+theta[5]*matchcc[, 3] term0=theta[1]+theta[3]*matchcc[, 2]+ theta[4]*matchcc[, 4]+theta[5]*matchcc[, 3] capphi=log(exp(term1)/(1+exp(term1)))-log(exp(term0)/(1+exp(term0))) ########## Small phi q1=theta[1]+theta[2]+theta[3]*matchcc[, 2]+ theta[4]+theta[5]*matchcc[, 3] q0=theta[1]+theta[2]+theta[3]*matchcc[, 2]+theta[5]*matchcc[, 3] qx=theta[6]+theta[7]+theta[8]*matchcc[, 2]+theta[9]*matchcc[, 3] newq1=theta[1]+theta[3]*matchcc[, 2]+ theta[4]+theta[5]*matchcc[, 3] newq0=theta[1]+theta[3]*matchcc[, 2]+theta[5]*matchcc[, 3] newqx=theta[6]+theta[8]*matchcc[, 2]+theta[9]*matchcc[, 3] smallphi=log(exp(qx)/((1+exp(q1))*(1+exp(qx)))+1/ ((1+exp(q0))*(1+exp(qx))))-log(exp(newqx)/((1+exp(newq1))* (1+exp(newqx))) +1/((1+exp(newq0))*(1+exp(newqx)))) offset=matchcc[, 5]*capphi+(1-matchcc[, 5])*smallphi xstar=(log(1+exp(theta[6]+theta[7]+theta[8]*matchcc[,2]+ theta[9]*matchcc[, 3]))- log(1+exp(theta[6]+theta[8]*matchcc[,2]+ theta[9]*matchcc[, 3])))/theta[6] xhat=matchcc[, 4]*matchcc[, 5]+(1-matchcc[, 5])*xstar fn2=function(beta){ qn=beta[1]*matchcc[, 3]+beta[2]*xhat+offset qn1=qn[seq(1, 315, 5)]#qn1=qn[seq(1, 315, 5)] qn2=exp(qn[seq(1, 315, 5)])+exp(qn[seq(2, 315, 5)])+ exp(qn[seq(3, 315, 5)])+exp(qn[seq(4, 315, 5)])+exp(qn[seq(5, 315, 5)]) -sum(qn1)+sum(log(qn2)) } beta=rep(0.001, 2) out=nlm(fn2, beta) beta=out$estimate print(beta) #paikbeta=rbind(paikbeta, beta)} # Missing data CLR newmatchcc=matchcc[-indx,] y1=newmatchcc[,1] st=NULL k=0 for( i in 1: length(y1)) { if(y1[i]!=0) {k=k+1; st=c(st,k)} else {st=c(st, k)} } s=clogit(newmatchcc[, 1]~newmatchcc[, 3]+newmatchcc[, 4]+strata(st)) betam=s$coefficients#) } ###################### End of Paik's method # Following is the proposed method which is described in Sinha and Maiti (2007), abbreviated as SM smfn=function(alpha){ hn3=.Fortran("newsm", matchcc, nrow=as.integer(nr), ncol=as.integer(nc), alpha=as.double(alpha), alpha_old=as.double(alpha_old), ndalpa=as.integer(ndalpa),output=lglk) -hn3$output } ndalpa=10 alpha_old=rep(0.01, ndalpa); eps=1. while(eps>0.01){ alpha=alpha_old outsm=nlm(smfn, alpha) alpha_old=outsm$estimate eps=sum(abs((alpha-alpha_old)/alpha)) print(alpha_old) } betamatsm=rbind(betamatsm, alpha[9:10]) # End of SM