Skip to contents
knitr::opts_chunk$set(echo = TRUE)
install.packages("devtools")
devtools::install_github("omicsEye/massSight")

read data

We have consolidated the data from the two sources and created a single excel file for each method (HILIC-pos, HILIC-neg, C18-neg, C8-pos), which can be downloaded from the following link: https://gwu.box.com/s/i8fxlre70b0sbrdhp9sjjfautlmlwd6x

The PRISM data can be downloaded from the following link: https://gwu.box.com/s/033vt0ieh3gucaw5f9as3fuvna7h1b4t

Load iHMP data

loaded_data <-
  massSight::load_data(
    input = "path/to/your/iHMP_data.xlsx",
    type = "all",
    sheet = 1,
    id = "Compound_ID"
  )
loaded_data$feature_metadata$MZ <-
  as.numeric(loaded_data$feature_metadata$MZ)
loaded_data$feature_metadata$RT <-
  as.numeric(loaded_data$feature_metadata$RT)
feature_metadata2 <-
  loaded_data$feature_metadata[colnames(loaded_data$data), ]
feature_metadata2$Intensity <- colMeans(loaded_data$data, na.rm = T)
feature_metadata <- cbind(feature_metadata2, t(loaded_data$data))
ref_input <-
  feature_metadata2[!is.na(feature_metadata2$MZ) &
    !is.na(feature_metadata2$RT), ]

load PRISM clustered data as main output

PRISM <-
  massSight::load_data(
    input = "path/to/your/PRISM_data.xlsx",
    type = "all",
    sheet = 1,
    id = "Compound_ID"
  )
PRISM$feature_metadata$MZ <-
  as.numeric(PRISM$feature_metadata$MZ)
PRISM$feature_metadata$RT <-
  as.numeric(PRISM$feature_metadata$RT)
feature_metadata2 <-
  PRISM$feature_metadata[colnames(PRISM$data), ]
feature_metadata2$Intensity <- colMeans(PRISM$data, na.rm = T)
feature_metadata <- cbind(feature_metadata2, t(PRISM$data))
PRISM_input <-
  feature_metadata2[!is.na(feature_metadata2$MZ) &
    !is.na(feature_metadata2$RT), ]
PRISM_Methods <- sapply(str_split(PRISM_input$Compound_ID, "_"), "[[", 1)
PRISM_input$Method <- PRISM_Methods

Approach 1: iHMP vs. PRISM method by method

Create an object for iHMP data as reference for alignment and combining

aligned_df <- vector(mode = "list", length = 4)
profiling_methods <- c("HILIC-neg", "HILIC-pos", "C18-neg", "C8-pos")
names(aligned_df) <- profiling_methods
ref <- query <- final_smooth <- aligned_df
for (profiling_method in profiling_methods) {
  ref[[profiling_method]] <-
    create_ms_obj(
      df = ref_input[ref_input$Method == profiling_method, ],
      name = "iHMP",
      id_name = "Compound_ID",
      rt_name = "RT",
      mz_name = "MZ",
      int_name = "Intensity"
    )

  query[[profiling_method]] <-
    create_ms_obj(
      df = PRISM_input[PRISM_input$Method == profiling_method, ],
      name = "PRISM",
      id_name = "Compound_ID",
      rt_name = "RT",
      mz_name = "MZ",
      int_name = "Intensity"
    )

  # create and save distribution of MZ and RT of features
  ms1_distr <- massSight::distribution_plot(query[[profiling_method]])
  ms2_distr <- massSight::distribution_plot(ref[[profiling_method]])
  print(ms1_distr)
  print(ms2_distr)
}

These plots can be saved locally as well.

  ggsave(
    filename = paste0("analysis/iHMP_PRISM/PRISM_", profiling_method, "_query_distr_plot.png"),
    plot = ms1_distr,
    width = 5,
    height = 5,
    units = "in",
    dpi = 300,
    create.dir = TRUE
  )
  ggsave(
    filename = paste0("analysis/iHMP_PRISM/iHMP_", profiling_method, "_ref_distr_plot.png"),
    plot = ms2_distr,
    width = 5,
    height = 5,
    units = "in",
    dpi = 300,
    create.dir = TRUE
  )
}
for (profiling_method in profiling_methods) {
  print(profiling_method)
  aligned_df[[profiling_method]] <-
    auto_combine(ref[[profiling_method]], query[[profiling_method]], smooth_method = "gam", log = NULL)
}

View and write

for (profiling_method in profiling_methods) {
  write.table(aligned_df[[profiling_method]]@all_matched, file = paste0("analysis/iHMP_PRISM/iHMP_PRISM_", profiling_method, "_excel.tsv"), quote = FALSE, sep = "\t", row.names = FALSE)
}

Visualization

for (profiling_method in profiling_methods) {
  final_smooth[[profiling_method]] <- final_plots(aligned_df[[profiling_method]])
  print(final_smooth[[profiling_method]])
  ggsave(
    filename = paste0("analysis/iHMP_PRISM/iHMP_vs_all_PRISM_excel_", profiling_method, "_massSight_plots.png"),
    plot = final_smooth[[profiling_method]],
    width = 5,
    height = 5,
    units = "in",
    dpi = 300,
    create.dir = TRUE
  )
}
for (profiling_method in profiling_methods) {
  write.table(aligned_df[[profiling_method]]@all_matched,
    file = paste0("analysis/iHMP_PRISM/iHMP_PRISM_", profiling_method, "_excel.tsv"),
    quote = FALSE, sep = "\t", row.names = FALSE
  )
}