R：[0,1] 之外的 x 'probs'-解网

问：

我正在使用 R 编程语言。

我正在尝试按照 R （https://cran.r-project.org/web/packages/optimization/optimization.pdf）中“优化”包中的说明进行操作，并在特定函数上使用此包中的函数。

在这个例子中，我首先生成一些随机数据：

#load libraries
library(dplyr)


# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,5)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)

从这里开始，我定义了要优化的函数（“fitness”）。此函数接受 7 个输入并计算“总”平均值（单个标量值）。此功能所需的输入包括：

“random_1”（介于 80 和 120 之间）
“random_2”（介于“random_1”和 120 之间）
“random_3”（介于 85 和 120 之间）
“random_4”（介于 random_2 和 120 之间）
“split_1”（介于 0 和 1 之间）
“split_2”（介于 0 和 1 之间）
“split_3”（介于 0 和 1 之间）

要优化的功能定义如下：

library(optimization)

fitness <- function(x) {
  #bin data according to random criteria
  train_data <- train_data %>% 
                 mutate(cat = ifelse(a1 <= x[1] & b1 <= x[3], "a", 
                               ifelse(a1 <= x[2] & b1 <= x[4], "b", "c")))

 train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(a1, b1, c1, cat)
    
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(a1, b1, c1, cat)
    
    c_table = train_data %>%
        filter(cat == "c") %>%
        select(a1, b1, c1, cat)
    
   
    
    #calculate  quantile ("quant") for each bin
    
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = quantile(c1, prob = x[5])))
    
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = quantile(c1, prob = x[6])))
    
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = quantile(c1, prob = x[7])))
    
    
    
    
    #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
    table_a$diff = ifelse(table_a$quant > table_a$c1,1,0)
    table_b$diff = ifelse(table_b$quant > table_b$c1,1,0)
    table_c$diff = ifelse(table_c$quant > table_c$c1,1,0)
    
    #group all tables
    
    final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
    mean = mean(final_table$diff)
    
    
}

从这里，我尝试运行以下优化函数：

Output <- optim_nm(fitness, k = 7, trace = TRUE)

plot(output)
 plot(Output, 'contour')

但这些返回以下错误：

 Error: Problem with `mutate()` column `quant`.
i `quant = quantile(c1, prob = x[6])`.
x 'probs' outside [0,1]
Run `rlang::last_error()` to see where the error occurred. 

Error in plot(Output) : object 'Output' not found

我认为错误是“split_1”、“split_2”和“split_3”变量被分配了 0 和 1 之外的值：由于该函数使用这些变量来计算百分位数（例如），这样自然会导致错误吗？quant = quantile(c1, prob = x[5]

我尝试使用此包中的另一种优化算法，其中明确定义了这 7 个输入的范围，但这也产生了相同的错误：

ro_sa <- optim_sa(fun = fitness,
start = c(runif(7, min = -1, max = 1)),
lower = c(80,80,80,80,0,0,0),
upper = c(120,120,120,120,1,1,1),
trace = TRUE,
control = list(t0 = 100,
nlimit = 550,
t_min = 0.1,
dyn_rf = FALSE,
rf = 1,
r = 0.7
)
)

 Error: Problem with `mutate()` column `quant`.
i `quant = quantile(c1, prob = x[6])`.
x 'probs' outside [0,1]

如果您提供初始起点，这也不起作用：

optim_nm(fitness, start = c(80,80,80,80,0.5,0.6,0.7))

 Error: Problem with `mutate()` column `quant`.
i `quant = quantile(c1, prob = x[5])`.
x 'probs' outside [0,1]
i The error occurred in group 1: cat = a.

问题：有人可以告诉我如何解决这个问题，以便我可以运行优化功能吗？

#desired functions to run:

Output <- optim_nm(fitness, k = 7, trace = TRUE)

plot(output)
 plot(Output, 'contour')

ro_sa <- optim_sa(fun = fitness,
start = c(runif(7, min = -1, max = 1)),
lower = c(80,80,80,80,0,0,0),
upper = c(120,120,120,120,1,1,1),
trace = TRUE,
control = list(t0 = 100,
nlimit = 550,
t_min = 0.1,
dyn_rf = FALSE,
rf = 1,
r = 0.7
)
)

optim_nm(fitness, start = c(80,80,80,80,0.5,0.6,0.7))

谢谢

R 函数优化 DPLYR 数据操作

library(optimization)

fitness <- function(x) {
  #bin data according to random criteria
  train_data <- train_data %>% 
    mutate(cat = ifelse(a1 <= x[1] & b1 <= x[3], "a", 
                        ifelse(a1 <= x[2] & b1 <= x[4], "b", "c")))
  
  train_data$cat = as.factor(train_data$cat)
  
  #new splits
  a_table = train_data %>%
    filter(cat == "a") %>%
    select(a1, b1, c1, cat)
  
  b_table = train_data %>%
    filter(cat == "b") %>%
    select(a1, b1, c1, cat)
  
  c_table = train_data %>%
    filter(cat == "c") %>%
    select(a1, b1, c1, cat)
  
  
  x[5:7] <- prop.table(abs(x[5:7]))
  
  #calculate  quantile ("quant") for each bin
  
  table_a = data.frame(a_table%>% group_by(cat) %>%
                         mutate(quant = quantile(c1, prob = x[5])))
  
  table_b = data.frame(b_table%>% group_by(cat) %>%
                         mutate(quant = quantile(c1, prob = x[6])))
  
  table_c = data.frame(c_table%>% group_by(cat) %>%
                         mutate(quant = quantile(c1, prob = x[7])))
  
  
  #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
  table_a$diff = ifelse(table_a$quant > table_a$c1,1,0)
  table_b$diff = ifelse(table_b$quant > table_b$c1,1,0)
  table_c$diff = ifelse(table_c$quant > table_c$c1,1,0)
  
  #group all tables
  
  final_table = rbind(table_a, table_b, table_c)
  # calculate the total mean : this is what needs to be optimized
  mean = mean(final_table$diff) 
}

您可以应用和绘制此功能 -

Output <- optim_nm(fitness, k = 7, trace = TRUE)
plot(Output)

R：[0,1] 之外的 x 'probs'

R: x 'probs' outside [0,1]

评论

评论