如何在 r 中有效地编码 MATCH INDEX(相当于 excel)?

How to effectively code MATCH INDEX in r (equivalent to excel)?

提问人:TFT 提问时间:9/11/2023 最后编辑:TFT 更新时间:9/11/2023 访问量:54

问:

我有两个不同的 excel 电子表格(输入和输出文件)。我想使用 r 脚本在输入文件中查找矩阵中的值,并将值存储在输出文件中的正确单元格中,而不是直接在 excel 中使用匹配索引函数。

我的输入文件由矩阵(列和行)和输出文件组成。但是,输出文件中的矩阵是“转置”的,列和行的名称可以按不同的顺序排列。因此,我需要使用二维查找来查找输入文件中的值并将它们存储在输出文件中。

假设这是我的输入文件(用于说明的虚构数字):enter image description here

假设这是我的输出文件:enter image description here

如何在 r 中实现查找,以便使用 r 脚本将输入文件中的值正确输入到输出文件中的相应单元格中?我已将两个 excel 文件存储为数据帧。

您的帮助非常有价值。谢谢!


#clearing workspace
rm(list=ls())

# Load required libraries
library(openxlsx)

# get username 
username <- Sys.getenv("USER")

# Load input and output Excel files
input_file <- paste0("/Users/", username, "/Downloads/input_file.xlsx", collapse = "")
output_file <- paste0("/Users/", username, "/Desktop/output_file.xlsx", collapse = "")

# Load the input and output matrices
input_matrix <- read_xlsx(input_file, sheet = "KLICKHERE")
output_matrix <- read_xlsx(output_file, sheet = "ENTERHERE")
class(input_matrix)

# Transpose the dataframe
transposed_input_matrix <- t(input_matrix)

# Convert the column names to Date objects
colnames(output_matrix) <- as.Date(colnames(output_matrix), format = "%YYYY/%mm/%dd")

# Function to perform the two-dimensional lookup
lookup_and_update <- function(transposed_input_matrix, output_matrix) {
  for (i in 1:nrow(output_matrix)) {
    for (j in 1:ncol(output_matrix)) {
      # Get the row and column names in the output matrix
      row_name <- rownames(output_matrix)[i]
      col_name <- colnames(output_matrix)[j]
      
      # Find the corresponding value in the input matrix
      value <- transposed_input_matrix[row_name, col_name]
      
      # Update the value in the output matrix
      output_matrix[i, j] <- value
    }
  }
  return(output_matrix)
}

# Call the lookup function
updated_output_matrix <- lookup_and_update(transposed_input_matrix, output_matrix)

# Save the updated output matrix back to the output Excel file
write.xlsx(updated_output_matrix, output_file, sheetName = "ENTERHERE")

> dput(input_matrix)
structure(list(quarter = structure(c(1640995200, 1648771200, 
1656633600, 1664582400, 1672531200, 1680307200, 1688169600, 1696118400, 
1704067200, 1711929600, 1719792000, 1727740800, 1735689600, 1743465600, 
1751328000, 1759276800, 1767225600, 1775001600, 1782864000, 1790812800, 
1798761600, 1806537600, 1814400000, 1822348800), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), portugal = c(3.2, 1.2617029893181, 
2.60440314593473, 0.205747170894448, 2.99742239259666, 0.454981287908458, 
0.812500920203167, 3.53979030628357, 2.203045423758, 0.054471200265702, 
2.92803826928382, 0.718964340034683, 1.60951470750129, 5.07871970749977, 
5.69403126006479, 1.22925310502368, 3.66396581660635, 2.37878419177338, 
2.29467033332622, 5.03595630837856, 2.25374064291613, 1.69444882698869, 
4.16205429572283, 4.50132478373478), Switzerland = c(4, 2.38038947850657, 
5.47668679859636, 5.91361388434538, 4.77394394868853, 0.51390066344242, 
5.01921886848812, 2.50248783131655, 4.01832050488102, 5.41622706832583, 
5.30149956216031, 3.16778787833323, 2.199973116468, 5.01366343788224, 
4.29923192879718, 4.74615956273584, 1.28422990972834, 0.284477581237545, 
2.08538425170424, 0.463401565316672, 5.19591972413863, 1.48139690105528, 
3.72116283773825, 2.88215533537597), UK1 = c(3, 5.86873632407074, 
5.00564172969994, 4.53205722786764, 2.21527468771027, 4.01342647825025, 
5.38033314419433, 3.94260225784184, 3.32679878460482, 4.44258374317064, 
0.912140741259649, 3.31029041858673, 3.54577260155724, 5.47399328355281, 
2.87960737852272, 0.333399757849791, 1.68600300552304, 0.761656675816694, 
5.60117991518305, 2.41681043343095, 1.47930439097793, 1.96253624751877, 
2.04852072952451, 3.00458221738878), UK2 = c(-1, 3.35979319893751, 
3.41085866605616, 0.560088392935827, 5.13880709708747, 4.12321867925324, 
0.678575131657537, 5.05445686032681, 1.91810878862458, 1.3819304062605, 
0.80241487254838, 5.88840619656107, 1.4643177661779, 1.30971606465739, 
5.27065656469845, 2.59430512488464, 2.43626303990699, 2.6781401256743, 
2.92798363758913, 3.82250194049481, 3.53273150832144, 2.88313585242345, 
2.2629948322944, 1.45945340574197)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -24L))
> 
> dput(output_matrix)
structure(list(c("Portugal", "Switzerland", "UK"), c(NA, NA, 
NA), c(NA, NA, NA), c(NA, NA, NA), c(NA, NA, NA), c(NA, NA, NA
), c(NA, NA, NA), c(NA, NA, NA), c(NA, NA, NA)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -3L), .Names = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_))
> 
r Excel 索引匹配 查找表

评论

0赞 Mark 9/11/2023
嗨,TFT,您是否将 excel 文件导入为数据帧?
0赞 Mark 9/11/2023
另外,什么是转置您的数据?如果代码是 R,请发布代码。将事物保持在同一状态比来回转置数据要多得多
0赞 TFT 9/11/2023
嗨,马克。感谢您的快速反馈。是的,我已经将我的 excel 文件导入为数据帧,并且我已经转置了我的输入文件。我刚刚在我的初始请求中发布了我的 r 代码。提前感谢您的帮助!
0赞 Mark 9/11/2023
伟大!谢谢你这样做!:-)现在,您可以做的另一件事会很棒 - 运行 ,并将结果添加到您的问题中dput(input_matrix)
0赞 TFT 9/11/2023
刚刚完成!:)感谢您的帮助和耐心!

答:

0赞 Near Lin 9/11/2023 #1

也许你可以把你的input_matrix换成output_matrix?

喜欢:

output_df <- data.frame(t(input_matrix))[-1, ]

经过一些调整:

colnames(output_df) <- input_matrix$quarter
output_df$time <- names(input_matrix)[-1]
output_df <- output_df[, c(ncol(output_df), 1:(ncol(output_df) - 1))]

PS:关于如何组合UK1和UK2的任何说明?

评论

1赞 Mark 9/11/2023
嗨,林附近!查看我与OP的聊天记录!:-)
1赞 Mark 9/11/2023 #2

步骤:

  1. 将季度变成日期(这是一个日期时间)
  2. 使数据变长,将所有国家/地区列转换为自己的行,并将国家/地区名称放入名为“国家/地区”的列中
  3. 清理“国家/地区”列 - 如果它包含“英国”,则将其设为“英国”,否则,将其设为标题大小写
  4. 再次加宽,使用季度日期作为列名,使用值作为值。因为我们现在有多个 UK 行的值,所以我们使用匿名函数将它们转换为字符串。 表示它将多个值连接起来,并在值中间用逗号连接。collapse = ","
  5. 将其写出到 csv 文件中

法典:

pacman::p_load(tidyverse)

input_matrix |>
  mutate(quarter = as.Date(quarter)) |>
  pivot_longer(-quarter, names_to = "country", values_to = "value") |>
  mutate(country = ifelse(str_detect(country, "UK"), "UK", str_to_title(country))) |>
  pivot_wider(names_from = "quarter", values_from = "value", values_fn = ~paste0(.x, collapse = ",")) |>
  write_csv("output.csv")

输出:

country,2022-01-01,2022-04-01,2022-07-01,2022-10-01,2023-01-01,2023-04-01,2023-07-01,2023-10-01,2024-01-01,2024-04-01,2024-07-01,2024-10-01,2025-01-01,2025-04-01,2025-07-01,2025-10-01,2026-01-01,2026-04-01,2026-07-01,2026-10-01,2027-01-01,2027-04-01,2027-07-01,2027-10-01
Portugal,3.2,1.2617029893181,2.60440314593473,0.205747170894448,2.99742239259666,0.454981287908458,0.812500920203167,3.53979030628357,2.203045423758,0.054471200265702,2.92803826928382,0.718964340034683,1.60951470750129,5.07871970749977,5.69403126006479,1.22925310502368,3.66396581660635,2.37878419177338,2.29467033332622,5.03595630837856,2.25374064291613,1.69444882698869,4.16205429572283,4.50132478373478
Switzerland,4,2.38038947850657,5.47668679859636,5.91361388434538,4.77394394868853,0.51390066344242,5.01921886848812,2.50248783131655,4.01832050488102,5.41622706832583,5.30149956216031,3.16778787833323,2.199973116468,5.01366343788224,4.29923192879718,4.74615956273584,1.28422990972834,0.284477581237545,2.08538425170424,0.463401565316672,5.19591972413863,1.48139690105528,3.72116283773825,2.88215533537597
UK,"3,-1","5.86873632407074,3.35979319893751","5.00564172969994,3.41085866605616","4.53205722786764,0.560088392935827","2.21527468771027,5.13880709708747","4.01342647825025,4.12321867925324","5.38033314419433,0.678575131657537","3.94260225784184,5.05445686032681","3.32679878460482,1.91810878862458","4.44258374317064,1.3819304062605","0.912140741259649,0.80241487254838","3.31029041858673,5.88840619656107","3.54577260155724,1.4643177661779","5.47399328355281,1.30971606465739","2.87960737852272,5.27065656469845","0.333399757849791,2.59430512488464","1.68600300552304,2.43626303990699","0.761656675816694,2.6781401256743","5.60117991518305,2.92798363758913","2.41681043343095,3.82250194049481","1.47930439097793,3.53273150832144","1.96253624751877,2.88313585242345","2.04852072952451,2.2629948322944","3.00458221738878,1.45945340574197"