MAFTOOLS:将扩增子测序数据中的变体添加到WES数据生成的MAF文件中

MAFTOOLS: Adding variants from amplicon sequenced data to MAF file generated by WES data

提问人:AnandamidaCBD 提问时间:7/18/2023 最后编辑:AnandamidaCBD 更新时间:7/27/2023 访问量:39

问:

我对生物信息学比较陌生,我需要一些帮助,将使用扩增子数据测序的特定基因的变异(由于热点测序不良)添加到 MAF(突变注释格式)文件中。目的是正确地对这一代进行肿瘤。我拥有的 MAF 文件完全由 WES(全外显子组测序)数据生成。 这些是我的表格列,我想添加到我的 MAF 中:

> head(fgfr3_status)
# A tibble: 6 × 19
  Sample  chrom     Pos Ref   Alt   var_type consequence                Impact `cDNA pos` `CDS pos` `protein pos` `AA pos` `codon change` SIFT  `Known var` Tum_Ref Tum_Alt Tum_VAF `Seq type`
  <chr>   <dbl>   <dbl> <chr> <chr> <chr>    <chr>                      <chr>  <chr>      <chr>     <chr>         <chr>    <chr>          <chr> <chr>         <dbl>   <dbl>   <dbl> <chr>     

但是,我不确定此表中的数据是否包含满足填写 MAF 文件要求所需的所有信息。

这是我到目前为止尝试过的代码:

setwd("path/to/folder/")
file <- list.files(pattern = ".consensus.3.maf")
library(maftools)
library(dplyr)
library(readxl)
MAF <- read.maf(maf = file)

setwd("path/to/folder")
fgfr3_status <- read_excel("FGFR3_mutation_status.xlsx")

MAF_data <- MAF@data %>%
  add_count(Hugo_Symbol, Transcript_ID, Tumor_Sample_Barcode, Chromosome, Start_Position, Variant_Classification)


new_variants_df <- data.frame(
  Hugo_Symbol = "FGFR3",
  Entrez_Gene_Id = NA,  # You might need to add NA values for columns not present in the metadata
  Center = NA,
  NCBI_Build = NA,
  Chromosome = fgfr3_status$chrom,
  Start_Position = fgfr3_status$Pos,
  End_Position = NA,    # You might need to add NA values for columns not present in the metadata
  Strand = NA,
  Variant_Classification = fgfr3_status$consequence,
  Variant_Type = fgfr3_status$var_type,
  Reference_Allele = fgfr3_status$Ref,
  Tumor_Seq_Allele1 = fgfr3_status$Tum_Ref,
  Tumor_Seq_Allele2 = fgfr3_status$Tum_Alt,
  dbSNP_RS = fgfr3_status$`Known var`,
  dbSNP_Val_Status = NA,
  Tumor_Sample_Barcode = fgfr3_status$Sample,
  Matched_Norm_Sample_Barcode = NA,
  Match_Norm_Seq_Allele1 = fgfr3_status$Alt,
  Match_Norm_Seq_Allele2 = NA,
  Tumor_Validation_Allele1 = NA,
  Tumor_Validation_Allele2 = NA,
  Match_Norm_Validation_Allele1 = NA,
  Match_Norm_Validation_Allele2 = NA,
  Verification_Status = NA,
  Validation_Status = NA,
  Mutation_Status = NA,
  Sequencing_Phase = NA,
  Sequence_Source = fgfr3_status$`Seq type`,
  Validation_Method = NA,
  Score = NA,
  BAM_File = NA,
  Sequencer = NA,
  Tumor_Sample_UUID = NA,
  Matched_Norm_Sample_UUID = NA,
  HGVSc = NA,
  HGVSp = NA,
  HGVSp_Short = NA,
  Transcript_ID = NA,
  Exon_Number = NA,
  t_depth = NA,
  t_ref_count = NA,
  t_alt_count = NA,
  n_depth = NA,
  n_ref_count = NA,
  n_alt_count = NA,
  all_effects = NA,
  Allele= NA,
  Gene= NA,
  Feature=NA,
  Feature_type= NA,
  Consequence= fgfr3_status$consequence,
  cDNA_postion=fgfr3_status$`cDNA pos`,
  CDS_position=fgfr3_status$`CDS pos`,
  Protein_position=fgfr3_status$`protein pos`,
  Amino_acids=fgfr3_status$`AA pos`,
  Codons=fgfr3_status$`codon change`,
  Existing_variation= fgfr3_status$`Known var`,
  SIFT= fgfr3_status$SIFT,
  IMPACT=fgfr3_status$Impact,
  Tum_VAF=fgfr3_status$Tum_VAF)



merged_MAF <- rbind(MAF@data, new_variants_df, fill=TRUE)
setwd("path/to/folder")
save(merged_MAF, file = "FGFR3_Rick.rda")

MERGED_MAF <- MAF(nonSyn= merged_MAF)
write_main_maf(MERGED_MAF, "FGFR3_RICK")

FILE <- list.files(pattern = "FGFR3_RICK.maf")
new_MAF <- read.maf(FILE)
new_MAF@data

pdf("path/to/folder/FGFR3_oncoplot.pdf", width = 20, height = 20)
oncoplot(maf= Z , top = 200, fontSize = 0.3)
dev.off()

我遇到的问题是,运行此代码后,我在 MAF@data 和 new_MAF@data 中找到的总变体完全相同。但是,我希望将新变体添加到 MAF 对象中。有人可以帮我确定可能出了什么问题吗?肿瘤批次在顶级突变基因中均未显示FGFR3。

提前感谢您的帮助!

r 合并 rbind vcf-variant-call-format

评论


答: 暂无答案