来自 Kaplan Meir 的 IPD!请帮助这个菜鸟

IPD from Kaplan Meir! Pls help this rookie out

提问人:Wan Teller 提问时间:11/12/2023 最后编辑:LimeyWan Teller 更新时间:11/13/2023 访问量:23

问:

在过去的两天里,我一直在尝试首先从头开始理解 R,然后让这段代码运行。

我最初是从纸 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3313891/#S1 中得到这个代码的。以 YouTube 视频为指导,我使用数字化仪提取坐标,按照该人的说法进行更改并尝试运行代码。

不会的!

我一直收到这个错误:** 错误 while ((n.hat[lower[i + 1]] > n.risk[i + 1]) ||((n.hat[lower[i + : 需要 TRUE/FALSE 的缺失值 ** 如果这里有专家,请告诉我我在哪里犯了错误。我正在粘贴代码,以及我用作数据集的 excel 文件。https://docs.google.com/spreadsheets/d/1qYxc5BtA-Ts3o2k5HpAkgseGnMiR5dGGNOp9NKQLGXk/edit?usp=sharing

1

#Algorithm to create a raw dataset from DigizeIt readings from a Kaplan-Meier curve
library("MASS")
library("splines")
library("survival")
###FUNCTION INPUTS


tot.events<-"NA" #tot.events = total no. of events reported. If not reported, then tot.events="NA"
arm.id<-1 #arm indicator

###END FUNCTION INPUTS
#Read in survival times read by digizeit
digizeit<- read.csv("1.csv", header = FALSE)
t.S<-digizeit[,1]
S<-digizeit[,2]

#Read in published numbers at risk, n.risk, at time, t.risk, lower and upper
# indexes for time interval

t.risk<- seq(0,56, by = 2)

lower<- purrr::map_dbl(t.risk,
                       function(x) min(which(t.S>=x)))
upper<- purrr::map_dbl(c(t.risk[-1], Inf),
                        function(x) max(which(t.S<x)))

n.risk<- c(108,103,86,78,67,59,51,50,38,36,33,32,30,30,29,29,27,26,26,24,23,21,21,20,19,19,17,16,13,11,9,9,8,8,8,8,8,7,7,7,5,5,5,5,5,5,5,5,4,3,3,2,1,1,1,0)





n.int<- length(n.risk)
n.t<- upper[n.int]






#Initialise vectors
arm<-rep(arm.id,n.risk[1])
n.censor<- rep(0,(n.int-1))
n.hat<- rep(n.risk[1]+1)
cen<- rep(1)
d<- rep(0)
KM.hat<- rep(1)
last.i<- rep(1,n.int)
sumdL<- 0


if (n.int > 1){
  #Time intervals 1,...,(n.int-1)
  for (i in 1:(n.int-1)){
    #First approximation of no. censored on interval i
    n.censor[i]<- round(n.risk[i]*S[lower[i+1]]/S[lower[i]]- n.risk[i+1])
    #Adjust tot. no. censored until n.hat = n.risk at start of interval (i+1)
    while((n.hat[lower[i+1]]>n.risk[i+1])||((n.hat[lower[i+1]]<n.risk[i+1])&&(n.censor[i]>0))){
      if (n.censor[i]<=0){
        cen[lower[i]:upper[i]]<-0
        n.censor[i]<-0
      }
      if (n.censor[i]>0){
        cen.t<-rep(0,n.censor[i])
        for (j in 1:n.censor[i]){
          cen.t[j]<- t.S[lower[i]] +
            j*(t.S[lower[(i+1)]]-t.S[lower[i]])/(n.censor[i]+1)
        }
        #Distribute censored observations evenly over time. Find no. censored on each time interval.
        cen[lower[i]:upper[i]]<-hist(cen.t,breaks=t.S[lower[i]:lower[(i+1)]],
                                     plot=F)$counts
      }
      #Find no. events and no. at risk on each interval to agree with K-M estimates read from curves
      n.hat[lower[i]]<-n.risk[i]
      last<-last.i[i]
      for (k in lower[i]:upper[i]){
        if (i==1 & k==lower[i]){
          d[k]<-0
          KM.hat[k]<-1
        }
        else {
          d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))
          KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k]))
        }
        n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
        if (d[k] != 0) last<-k
      }
      n.censor[i]<- n.censor[i]+(n.hat[lower[i+1]]-n.risk[i+1])
    }
    if (n.hat[lower[i+1]]<n.risk[i+1]) n.risk[i+1]<-n.hat[lower[i+1]]
    last.i[(i+1)]<-last
  }
}


#Time interval n.int. 
if (n.int>1){
  #Assume same censor rate as average over previous time intervals.
  n.censor[n.int]<- min(round(sum(n.censor[1:(n.int-1)])*(t.S[upper[n.int]]-
                                                            t.S[lower[n.int]])/(t.S[upper[(n.int-1)]]-t.S[lower[1]])), n.risk[n.int])
}
if (n.int==1){n.censor[n.int]<-0}
if (n.censor[n.int] <= 0){
  cen[lower[n.int]:(upper[n.int]-1)]<-0
  n.censor[n.int]<-0
}
if (n.censor[n.int]>0){
  cen.t<-rep(0,n.censor[n.int])
  for (j in 1:n.censor[n.int]){
    cen.t[j]<- t.S[lower[n.int]] + 
      j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1)
  }
  cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],
                                           plot=F)$counts
}
#Find no. events and no. at risk on each interval to agree with K-M estimates read from curves
n.hat[lower[n.int]]<-n.risk[n.int]
last<-last.i[n.int]
for (k in lower[n.int]:upper[n.int]){
  if(KM.hat[last] !=0){
    d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))} else {d[k]<-0}
  KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k])) 
  n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
  #No. at risk cannot be negative
  if (n.hat[k+1] < 0) {
    n.hat[k+1]<-0
    cen[k]<-n.hat[k] - d[k]
  }
  if (d[k] != 0) last<-k
}
#If total no. of events reported, adjust no. censored so that total no. of events agrees.
if (tot.events != "NA"){
  if (n.int>1){
    sumdL<-sum(d[1:upper[(n.int-1)]])
    #If total no. events already too big, then set events and censoring = 0 on all further time intervals
    if (sumdL >= tot.events){
      d[lower[n.int]:upper[n.int]]<- rep(0,(upper[n.int]-lower[n.int]+1))
      cen[lower[n.int]:(upper[n.int]-1)]<- rep(0,(upper[n.int]-lower[n.int]))
      n.hat[(lower[n.int]+1):(upper[n.int]+1)]<- rep(n.risk[n.int],(upper[n.int]+1-lower[n.int]))
    }
  }
  #Otherwise adjust no. censored to give correct total no. events
  if ((sumdL < tot.events)|| (n.int==1)){
    sumd<-sum(d[1:upper[n.int]])
    while ((sumd > tot.events)||((sumd< tot.events)&&(n.censor[n.int]>0))){
      n.censor[n.int]<- n.censor[n.int] + (sumd - tot.events)
      if (n.censor[n.int]<=0){
        cen[lower[n.int]:(upper[n.int]-1)]<-0
        n.censor[n.int]<-0
      }
      if (n.censor[n.int]>0){
        cen.t<-rep(0,n.censor[n.int])
        for (j in 1:n.censor[n.int]){
          cen.t[j]<- t.S[lower[n.int]] +
            j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1)
        }
        cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],
                                                 plot=F)$counts
      }
      n.hat[lower[n.int]]<-n.risk[n.int]
      last<-last.i[n.int]
      for (k in lower[n.int]:upper[n.int]){ 
        d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))
        KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k])) 
        if (k != upper[n.int]){
          n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
          #No. at risk cannot be negative
          if (n.hat[k+1] < 0) {
            n.hat[k+1]<-0
            cen[k]<-n.hat[k] - d[k]
          }
        }
        if (d[k] != 0) last<-k
      }
      sumd<- sum(d[1:upper[n.int]])
    }
  }
}
write.table(matrix(c(t.S,n.hat[1:n.t],d,cen),ncol=4,byrow=F),paste(path,KMdatafile,sep=""),sep="\t")
### Now form IPD ###
#Initialise vectors
t.IPD<-rep(t.S[n.t],n.risk[1])
event.IPD<-rep(0,n.risk[1])
#Write event time and event indicator (=1) for each event, as separate row in t.IPD and event.IPD
k=1
for (j in 1:n.t){
  if(d[j]!=0){ 
    t.IPD[k:(k+d[j]-1)]<- rep(t.S[j],d[j])
    event.IPD[k:(k+d[j]-1)]<- rep(1,d[j])
    k<-k+d[j]
  }
}
#Write censor time and event indicator (=0) for each censor, as separate row in t.IPD and event.IPD
for (j in 1:(n.t-1)){
  if(cen[j]!=0){ 
    t.IPD[k:(k+cen[j]-1)]<- rep(((t.S[j]+t.S[j+1])/2),cen[j])
    event.IPD[k:(k+cen[j]-1)]<- rep(0,cen[j])
    k<-k+cen[j]
  }
}
#Output IPD
IPD<-matrix(c(t.IPD,event.IPD,arm),ncol=3,byrow=F)

IPD




我尝试使用不同的网站来解决此错误。但是我没有编程经验,所以不知道我做错了什么。

r 统计 概率 生存分析 医疗

评论


答: 暂无答案