library(ROCR) library(scatterplot3d) library(rgl) library(SIS) set.seed(2015) v_x=c(0:9) v_y=c(0:9) v_z=c(0:9) v=expand.grid(v_x,v_y,v_z) N = 1000 p = 1000 beta=rep(0,p) beta_nonzero=which(v$Var1<8 & v$Var1>2 & v$Var2<8 & v$Var2>2 & v$Var3<8 & v$Var3>2 ) beta[beta_nonzero]=1 beta=matrix(beta, ncol=1) x<-matrix(rnorm(N*p, mean = 0, sd = 2), nrow=N, ncol=p) #### generate Y##### z = x%*% beta pr = 1/(1+exp(-z)) y=ifelse(pr<0.5,0,1) #y=rbinom(200,1,pr) table(y) ##################### #### model fitting### ##################### ##### glm ##### N_fit=800 y<-matrix(y,ncol=1) colnames(y)<-"y" colnames(x)<-paste('x',1:p) dat=data.frame(cbind(y,x)) ###perfomance ranking by AUC### #auc=c(0,rep=1000) acc=c(0,rep=p) for (i in 1:p){ xi=dat[1:N_fit,c(1,i+1)] glm.model=glm(y~.+0, data=xi,family="binomial") glm.pred=predict(glm.model, newdata=dat[(N_fit+1):N,],type="response",se.fit=T) acc[i] = mean((glm.pred$fit>=0.5)==y[(N_fit+1):N,]) } sort_res = sort(acc, method = "qu", decreasing = TRUE,index=TRUE) ##################################### ###Model 1: voxel 368 and 26 nbors### ##################################### rule=0.02 start=sort_res$ix[1] #start point int1=length(v_x)^0 int2=length(v_x)^1 int3=length(v_x)^2 nbor1=c(start+int1,start-int1, start+int2, start-int2,start+int3,start-int3, start+int1+int2, start+int1-int2, start-int1+int2, start-int1-int2, start+int1+int3, start+int1-int3, start-int1+int3, start-int1-int3, start+int3+int2, start+int3-int2, start-int3+int2, start-int3-int2, start+int1+int2+int3, start+int1-int2+int3, start-int1+int2+int3, start-int1-int2+int3, start+int1+int2-int3, start+int1-int2-int3, start-int1+int2-int3, start-int1-int2-int3 ) nbor1=c(start,nbor1[which(nbor1<1000 &nbor1>0)]) plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9)) plot$points3d(v[start,],pch=16,col='red') plot$points3d(v[368,],pch=16,col='red') ######LOOP############# dat1=dat[1:N_fit, c(1,nbor1+1)] glm.model1=glm(y~.+0, data=dat1,family="binomial") glm.pred1=predict(glm.model1, newdata=dat[(N_fit+1):N,c(1,nbor1+1)],type="response",se.fit=T) acc1 = mean((glm.pred1$fit>=0.5)==y[(N_fit+1):N,]) glm.model1_beta=glm.model1 acc1_beta=acc1 nbor1_temp=numeric() excld=numeric() for ( i in 1:16){ nbor1_ex=nbor1_temp acc1_ex=acc1_beta #print(i) beta_est1=summary(glm.model1_beta)$coef[,1] beta_min1=beta_est1[order(abs(beta_est1))][1] selected1=names(beta_min1) min_idx1=as.numeric(sub("x.","",selected1)) excld=c(excld,min_idx1) nbor1_temp=nbor1[which(is.element(nbor1,excld)==FALSE)] dat1_temp=dat[1:N_fit, c(1, nbor1_temp+1)] glm.model1_beta=glm(y~.+0, data=dat1_temp,control=list(maxit=200),family="binomial") glm.pred1_beta=predict(glm.model1_beta, newdata=dat[(N_fit+1):N,c(1,nbor1_temp+1)],type="response",se.fit=T) acc1_beta = mean((glm.pred1_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc1_beta-acc1_ex if (diff < -rule) { break } #print(diff) #if (diff > 0.011) break print(acc1_beta) } ##################################### ###Model 2 #### ##################################### #indices# nbor2=nbor1_ex for (i in 1: length(nbor1_ex)){ start2=nbor1_ex[i] ind9=!(is.element(v[start2,],9)) # don't plus ind0=!(is.element(v[start2,],0)) # don't minus nbor2=c(nbor2,c(start2+int1*ind9[1],start2-int1*ind0[1], start2+int2*ind9[2], start2-int2*ind0[2],start2+int3*ind9[3],start2-int3*ind0[3], start2+int1*ind9[1]+int2*ind9[2], start2+int1*ind9[1]-int2*ind0[2], start2-int1*ind9[1]+int2*ind9[2], start2-int1*ind0[1]-int2*ind0[2], start2+int1*ind9[1]+int3*ind9[3], start2+int1*ind9[1]-int3*ind0[3], start2-int1*ind0[1]+int3*ind9[3], start2-int1*ind0[1]-int3*ind0[3], start2+int3*ind9[3]+int2*ind9[2], start2+int3*ind9[1]-int2*ind0[2], start2-int3*ind0[3]+int2*ind9[2], start2-int3*ind9[2]-int2*ind0[2], start2+int1*ind9[1]+int2*ind9[2]+int3*ind9[3], start2+int1*ind9[1]-int2*ind0[2]+int3*ind9[3], start2-int1*ind0[1]+int2*ind9[2]+int3*ind9[3], start2-int1*ind0[1]-int2*ind0[2]+int3*ind9[3], start2+int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start2+int1*ind9[1]-int2*ind0[2]-int3*ind0[3], start2-int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start2-int1*ind0[1]-int2*ind0[2]-int3*ind0[3] ) ) } nbor2=unique(nbor2[which(nbor2<1000 &nbor2>0)]) plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9)) plot$points3d(v[nbor2,],col='blue',pch=16) dat2=dat[1:N_fit, c(1, nbor2+1)] glm.model2=glm(y~.+0, data=dat2,family="binomial") glm.pred2=predict(glm.model2, newdata=dat[(N_fit+1):N,c(1, nbor2+1)],type="response",se.fit=T) acc2 = mean((glm.pred2$fit>=0.5)==y[(N_fit+1):N,]) ###LOOP### glm.model2_beta=glm.model2 acc2_beta=acc2 excld=numeric() nbor2_temp=numeric() for ( i in 1:100){ acc2_ex=acc2_beta nbor2_ex=nbor2_temp #print(i) beta_est2=summary(glm.model2_beta)$coef[,1] beta_min2=beta_est2[order(abs(beta_est2))][1] selected2=names(beta_min2) min_idx2=as.numeric(sub("x.","",selected2)) #print(min_idx2) excld=c(excld,min_idx2) nbor2_temp=nbor2[which(is.element(nbor2,excld)==FALSE)] dat2_temp=dat[1:N_fit, c(1, nbor2_temp+1)] glm.model2_beta=glm(y~.+0, data=dat2_temp,control=list(maxit=200),family="binomial") glm.pred2_beta=predict(glm.model2_beta, newdata=dat[(N_fit+1):N,c(1,nbor2_temp+1)],type="response",se.fit=T) acc2_beta = mean((glm.pred2_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc2_beta-acc2_ex if (diff< -rule){ break } #print(diff) #if (diff > 0.011) break #print(acc2_beta) } plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9)) plot$points3d(v[nbor2_ex,],pch=16,col='blue') plot$points3d(v[beta_nonzero,],pch=16,col='red') ##################################### ###### Model 3####### ##################################### nbor3=nbor2_ex for (i in 1: length(nbor2_ex)){ start3=nbor2_ex[i] ind9=!(is.element(v[start3,],9)) # don't plus ind0=!(is.element(v[start3,],0)) # don't minus nbor3=c(nbor3,c(start3+int1*ind9[1],start3-int1*ind0[1], start3+int2*ind9[2], start3-int2*ind0[2],start3+int3*ind9[3],start3-int3*ind0[3], start3+int1*ind9[1]+int2*ind9[2], start3+int1*ind9[1]-int2*ind0[2], start3-int1*ind9[1]+int2*ind9[2], start3-int1*ind0[1]-int2*ind0[2], start3+int1*ind9[1]+int3*ind9[3], start3+int1*ind9[1]-int3*ind0[3], start3-int1*ind0[1]+int3*ind9[3], start3-int1*ind0[1]-int3*ind0[3], start3+int3*ind9[3]+int2*ind9[2], start3+int3*ind9[1]-int2*ind0[2], start3-int3*ind0[3]+int2*ind9[2], start3-int3*ind9[2]-int2*ind0[2], start3+int1*ind9[1]+int2*ind9[2]+int3*ind9[3], start3+int1*ind9[1]-int2*ind0[2]+int3*ind9[3], start3-int1*ind0[1]+int2*ind9[2]+int3*ind9[3], start3-int1*ind0[1]-int2*ind0[2]+int3*ind9[3], start3+int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start3+int1*ind9[1]-int2*ind0[2]-int3*ind0[3], start3-int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start3-int1*ind0[1]-int2*ind0[2]-int3*ind0[3] ) ) } nbor3=unique(nbor3[which(nbor3<1000 &nbor3>0)]) #111 plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9)) plot$points3d(v[nbor3,],col='red',pch=16) plot$points3d(v[start3,],col='red',pch=16) dat3=dat[1:N_fit, c(1, nbor3+1)] glm.model3=glm(y~.+0, data=dat3,family="binomial",control=list(maxit=200)) glm.pred3=predict(glm.model3, newdata=dat[(N_fit+1):N,c(1, nbor3+1)],type="response",se.fit=T) acc3 = mean((glm.pred3$fit>=0.5)==y[(N_fit+1):N,]) ######LOOP####### glm.model3_beta=glm.model3 excld=numeric() acc3_beta=acc3 nbor3_temp=numeric() for ( i in 1:292){ nbor3_ex=nbor3_temp acc3_ex=acc3_beta nbor3_ex=nbor3_temp #print(i) beta_est3=summary(glm.model3_beta)$coef[,1] beta_min3=beta_est3[order(abs(beta_est3))][1] selected3=names(beta_min3) min_idx3=as.numeric(sub("x.","",selected3)) #print(min_idx3) excld=c(excld,min_idx3) nbor3_temp=nbor3[which(is.element(nbor3,excld)==FALSE)] dat3_temp=dat[1:N_fit, c(1, nbor3_temp+1)] glm.model3_beta=glm(y~.+0, data=dat3_temp,family="binomial",control=list(maxit=200)) glm.pred3_beta=predict(glm.model3_beta, newdata=dat[(N_fit+1):N,c(1,nbor3_temp+1)],type="response",se.fit=T) acc3_beta = mean((glm.pred3_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc3_beta-acc3_ex if (diff < -rule) { print(i) break } #print(diff) #print(acc3_beta) } ##################################### ##### Model 4##### ##################################### nbor4=nbor3_ex for (i in 1: length(nbor3_ex)){ start4=nbor3_ex[i] ind9=!(is.element(v[start4,],9)) # don't plus ind0=!(is.element(v[start4,],0)) # don't minus nbor4=c(nbor4,c(start4+int1*ind9[1],start4-int1*ind0[1], start4+int2*ind9[2], start4-int2*ind0[2],start4+int3*ind9[3],start4-int3*ind0[3], start4+int1*ind9[1]+int2*ind9[2], start4+int1*ind9[1]-int2*ind0[2], start4-int1*ind9[1]+int2*ind9[2], start4-int1*ind0[1]-int2*ind0[2], start4+int1*ind9[1]+int3*ind9[3], start4+int1*ind9[1]-int3*ind0[3], start4-int1*ind0[1]+int3*ind9[3], start4-int1*ind0[1]-int3*ind0[3], start4+int3*ind9[3]+int2*ind9[2], start4+int3*ind9[1]-int2*ind0[2], start4-int3*ind0[3]+int2*ind9[2], start4-int3*ind9[2]-int2*ind0[2], start4+int1*ind9[1]+int2*ind9[2]+int3*ind9[3], start4+int1*ind9[1]-int2*ind0[2]+int3*ind9[3], start4-int1*ind0[1]+int2*ind9[2]+int3*ind9[3], start4-int1*ind0[1]-int2*ind0[2]+int3*ind9[3], start4+int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start4+int1*ind9[1]-int2*ind0[2]-int3*ind0[3], start4-int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start4-int1*ind0[1]-int2*ind0[2]-int3*ind0[3] ) ) } nbor4=unique(nbor4[which(nbor4<1000 &nbor4>0)]) #111 plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9),pch=16) plot$points3d(v[nbor4,],col='red',pch=16) plot$points3d(v[nbor3_ex,],col='red',pch=16) dat4=dat[1:N_fit, c(1, nbor4+1)] glm.model4=glm(y~.+0, data=dat4,family="binomial",control=list(maxit=200)) glm.pred4=predict(glm.model4, newdata=dat[(N_fit+1):N,c(1,nbor4+1)],type="response",se.fit=T) acc4 = mean((glm.pred4$fit>=0.5)==y[(N_fit+1):N,]) glm.model4_beta=glm.model4 acc4_beta=acc4 excld=numeric() nbor4_temp=numeric() for ( i in 1:200){ nbor4_ex=nbor4_temp acc4_ex=acc4_beta #print(i) beta_est4=summary(glm.model4_beta)$coef[,1] beta_min4=beta_est4[order(abs(beta_est4))][1] selected4=names(beta_min4) min_idx4=as.numeric(sub("x.","",selected4)) #print(min_idx4) excld=c(excld,min_idx4) nbor4_temp=nbor4[which(is.element(nbor4,excld)==FALSE)] dat4_temp=dat[1:N_fit, c(1, nbor4_temp+1)] glm.model4_beta=glm(y~.+0, data=dat4_temp,control=list(maxit=200),family="binomial") glm.pred4_beta=predict(glm.model4_beta, newdata=dat[(N_fit+1):N,c(1,nbor4_temp+1)],type="response",se.fit=T) acc4_beta = mean((glm.pred4_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc4_beta-acc4_ex #print(diff) if (diff < -0.02) { print(i) break } #print(acc4_beta) } ####MODEL5#### nbor5=nbor4_ex for (i in 1: length(nbor4_ex)){ start5=nbor4_ex[i] ind9=!(is.element(v[start5,],9)) # don't plus ind0=!(is.element(v[start5,],0)) # don't minus nbor5=c(nbor5,c(start5+int1*ind9[1],start5-int1*ind0[1], start5+int2*ind9[2], start5-int2*ind0[2],start5+int3*ind9[3],start5-int3*ind0[3], start5+int1*ind9[1]+int2*ind9[2], start5+int1*ind9[1]-int2*ind0[2], start5-int1*ind9[1]+int2*ind9[2], start5-int1*ind0[1]-int2*ind0[2], start5+int1*ind9[1]+int3*ind9[3], start5+int1*ind9[1]-int3*ind0[3], start5-int1*ind0[1]+int3*ind9[3], start5-int1*ind0[1]-int3*ind0[3], start5+int3*ind9[3]+int2*ind9[2], start5+int3*ind9[1]-int2*ind0[2], start5-int3*ind0[3]+int2*ind9[2], start5-int3*ind9[2]-int2*ind0[2], start5+int1*ind9[1]+int2*ind9[2]+int3*ind9[3], start5+int1*ind9[1]-int2*ind0[2]+int3*ind9[3], start5-int1*ind0[1]+int2*ind9[2]+int3*ind9[3], start5-int1*ind0[1]-int2*ind0[2]+int3*ind9[3], start5+int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start5+int1*ind9[1]-int2*ind0[2]-int3*ind0[3], start5-int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start5-int1*ind0[1]-int2*ind0[2]-int3*ind0[3] ) ) } nbor5=unique(nbor5[which(nbor5<1000 &nbor5>0)]) #111 plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9),pch=16) plot$points3d(v[nbor4_ex,],col='blue',pch=16) dat5=dat[1:N_fit, c(1, nbor5+1)] glm.model5=glm(y~.+0, data=dat5,family="binomial",control=list(maxit=200)) glm.pred5=predict(glm.model5, newdata=dat[(N_fit+1):N,c(1,nbor5+1)],type="response",se.fit=T) acc5 = mean((glm.pred5$fit>=0.5)==y[(N_fit+1):N,]) glm.model5_beta=glm.model5 acc5_beta=acc5 excld=numeric() nbor5_temp=numeric() for ( i in 1:100){ acc5_ex=acc5_beta nbor5_ex=nbor5_temp #print(i) beta_est5=summary(glm.model5_beta)$coef[,1] beta_min5=beta_est5[order(abs(beta_est5))][1] selected5=names(beta_min5) min_idx5=as.numeric(sub("x.","",selected5)) #print(min_idx5) excld=c(excld,min_idx5) nbor5_temp=nbor5[which(is.element(nbor5,excld)==FALSE)] dat5_temp=dat[1:N_fit, c(1, nbor5_temp+1)] glm.model5_beta=glm(y~.+0, data=dat5_temp,control=list(maxit=200),family="binomial") glm.pred5_beta=predict(glm.model5_beta, newdata=dat[(N_fit+1):N,c(1,nbor5_temp+1)],type="response",se.fit=T) acc5_beta = mean((glm.pred5_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc5_beta-acc5_ex if (diff < -rule) { print(i) break } #print(acc5_beta) } #######MODEL 6######### nbor6=nbor5_ex for (i in 1: length(nbor5_ex)){ start6=nbor5_ex[i] ind9=!(is.element(v[start6,],9)) # don't plus ind0=!(is.element(v[start6,],0)) # don't minus nbor6=c(nbor6,c(start6+int1*ind9[1],start6-int1*ind0[1], start6+int2*ind9[2], start6-int2*ind0[2],start6+int3*ind9[3],start6-int3*ind0[3], start6+int1*ind9[1]+int2*ind9[2], start6+int1*ind9[1]-int2*ind0[2], start6-int1*ind9[1]+int2*ind9[2], start6-int1*ind0[1]-int2*ind0[2], start6+int1*ind9[1]+int3*ind9[3], start6+int1*ind9[1]-int3*ind0[3], start6-int1*ind0[1]+int3*ind9[3], start6-int1*ind0[1]-int3*ind0[3], start6+int3*ind9[3]+int2*ind9[2], start6+int3*ind9[1]-int2*ind0[2], start6-int3*ind0[3]+int2*ind9[2], start6-int3*ind9[2]-int2*ind0[2], start6+int1*ind9[1]+int2*ind9[2]+int3*ind9[3], start6+int1*ind9[1]-int2*ind0[2]+int3*ind9[3], start6-int1*ind0[1]+int2*ind9[2]+int3*ind9[3], start6-int1*ind0[1]-int2*ind0[2]+int3*ind9[3], start6+int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start6+int1*ind9[1]-int2*ind0[2]-int3*ind0[3], start6-int1*ind9[1]+int2*ind9[2]-int3*ind0[3], start6-int1*ind0[1]-int2*ind0[2]-int3*ind0[3] ) ) } nbor6=unique(nbor6[which(nbor6<1000 &nbor6>0)]) #111 plot=scatterplot3d((v[beta_nonzero,]),xlim=c(0,9),ylim=c(0,9),zlim=c(0,9),pch=16) plot$points3d(v[nbor4_ex,],col='blue',pch=16) dat6=dat[1:N_fit, c(1, nbor6+1)] glm.model6=glm(y~.+0, data=dat6,family="binomial",control=list(maxit=200)) glm.pred6=predict(glm.model6, newdata=dat[(N_fit+1):N,c(1,nbor6+1)],type="response",se.fit=T) acc6 = mean((glm.pred6$fit>=0.5)==y[(N_fit+1):N,]) glm.model6_beta=glm.model6 acc6_beta=acc6 excld=numeric() nbor6_temp=numeric() for ( i in 1:100){ acc6_ex=acc6_beta nbor6_temp=nbor6_temp #print(i) beta_est6=summary(glm.model6_beta)$coef[,1] beta_min6=beta_est6[order(abs(beta_est6))][1] selected6=names(beta_min6) min_idx6=as.numeric(sub("x.","",selected6)) #print(min_idx6) excld=c(excld,min_idx6) nbor6_temp=nbor6[which(is.element(nbor6,excld)==FALSE)] dat6_temp=dat[1:N_fit, c(1, nbor6_temp+1)] glm.model6_beta=glm(y~.+0, data=dat6_temp,control=list(maxit=200),family="binomial") glm.pred6_beta=predict(glm.model6_beta, newdata=dat[(N_fit+1):N,c(1,nbor6_temp+1)],type="response",se.fit=T) acc6_beta = mean((glm.pred6_beta$fit>=0.5)==y[(N_fit+1):N,]) diff=acc6_beta-acc6_ex if (diff < -rule) { print(i) break } #print(acc6_beta) } #######################Scratch########################################### dat_true=dat[1:N_fit, c(1, beta_nonzero+1)] glm.modeltrue=glm(y~., data=dat_true,family="binomial",control=list(maxit=200)) glm.predtrue=predict(glm.modeltrue, newdata=dat[(N_fit+1):N,c(1, beta_nonzero+1)],type="response",se.fit=T) mean((glm.predtrue$fit>=0.5)==y[(N_fit+1):N,]) junk<-ifelse(glm.predtrue$fit>=0.5,1,0) table(abs(junk-y[(N_fit+1):N,])