## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)
library(BiocStyle)

## ----eval = FALSE, include = TRUE---------------------------------------------
#  # install BiocManager if not present
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#      install.packages("BiocManager")
#  }
#  
#  # install MetMashR and dependencies
#  BiocManager::install("MetMashR")

## ----eval=TRUE, include=FALSE-------------------------------------------------
suppressPackageStartupMessages({
    # load the packages
    library(struct)
    library(MetMashR)
    library(metabolomicsWorkbenchR)
    library(ggplot2)
})

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  # load the packages
#  library(struct)
#  library(MetMashR)
#  library(metabolomicsWorkbenchR)
#  library(ggplot2)

## ----example-get-data,eval=FALSE,include=TRUE---------------------------------
#  # get annotations
#  AN <- do_query(
#      context = "study",
#      input_item = "analysis_id",
#      input_value = "AN000465",
#      output_item = "metabolites"
#  )

## ----include=FALSE,eval=TRUE--------------------------------------------------
AN <- readRDS(
    system.file("extdata/AN000465_subset.rds", package = "MetMashR")
)

## -----------------------------------------------------------------------------
AT <- annotation_table(data = AN, id_column = NULL)

## -----------------------------------------------------------------------------
.mwb_source <- setClass(
    "mwb_source",
    contains = c("annotation_database"),
    prototype = list(
        name = "Import from Metabolomics Workbench",
        libraries = "metabolomicsWorkbenchR"
    )
)

## -----------------------------------------------------------------------------
mwb_source <- function(...) {
    # new object
    out <- new_struct(
        "mwb_source",
        ...
    )
    return(out)
}

## ----eval=TRUE,include=TRUE---------------------------------------------------
setMethod(
    f = "read_database",
    signature = c("mwb_source"),
    definition = function(obj) {
        ## get annotations using metabolomicsWorkbenchR
        # AN = do_query(
        #    context = "study",
        #    input_item = "analysis_id",
        #    input_value = M$analysis_id,
        #    output_item = "metabolites")

        ## for vignette use locally cached subset
        AN <- readRDS(
            system.file("extdata/AN000465_subset.rds", package = "MetMashR")
        )

        return(AN)
    }
)

## -----------------------------------------------------------------------------
# initialise source
SRC <- mwb_source(
    source = "AN000465"
)

# import
AT <- read_source(SRC)

## ----new-empty-removal-obj----------------------------------------------------
set_struct_obj(
    class_name = "drop_empty_columns",
    struct_obj = "model",
    params = character(0),
    outputs = c(updated = "annotation_source"),
    private = character(0),
    prototype = list(
        name = "Drop empty columns",
        description = paste0(
            "A workflow step that removes columns from an annotation table ",
            "where all rows are NA."
        ),
        predicted = "updated"
    )
)

## -----------------------------------------------------------------------------
M <- drop_empty_columns()
show(M)

## -----------------------------------------------------------------------------
set_obj_method(
    class_name = "drop_empty_columns",
    method_name = "model_apply",
    signature = c("drop_empty_columns", "annotation_source"),
    definition = function(M, D) {
        # search for columns of NA
        W <- lapply( # for each column
            D$data, # in the annotation table
            function(x) {
                all(is.na(x)) # return TRUE if all rows are NA
            }
        )

        # get index of columns with all rows NA
        idx <- which(unlist(W))

        # if any found, remove from annotation table
        if (length(idx) > 0) {
            D$data[, idx] <- NULL
        }

        # update model object
        M$updated <- D

        # return object
        return(M)
    }
)

## -----------------------------------------------------------------------------
M <- model_apply(M, AT)

## -----------------------------------------------------------------------------
ncol(AT$data)

## -----------------------------------------------------------------------------
ncol(M$updated$data)

## -----------------------------------------------------------------------------
# define new model object
set_struct_obj(
    class_name = "remove_suffix",
    struct_obj = "model",
    params = c(clean = "logical", column_name = "character"),
    outputs = c(updated = "annotation_source"),
    prototype = list(
        name = "Remove suffix",
        description = paste0(
            "A workflow step that removes suffixes from molecule names by ",
            "splitting a string at the last underscore an retaining the part",
            "of the string before the underscore."
        ),
        predicted = "updated",
        clean = FALSE,
        column_name = "V1"
    )
)

# define method for new object
set_obj_method(
    class_name = "remove_suffix",
    method_name = "model_apply",
    signature = c("remove_suffix", "annotation_source"),
    definition = function(M, D) {
        # get list of molecule names
        x <- D$data[[M$column_name]]

        # split string at last underscore
        s <- strsplit(x, "_(?!.*_)", perl = TRUE)

        # get left hand side
        s <- lapply(s, "[", 1)

        # if clean replace existing column, otherwise new column
        if (M$clean) {
            D$data[[M$column_name]] <- unlist(s)
        } else {
            D$data$name.fixed <- unlist(x)
        }

        # update model object
        M$updated <- D

        # return object
        return(M)
    }
)

## ----eval=FALSE,include = TRUE------------------------------------------------
#  # refmet
#  refmet <- mwb_refmet_database()
#  
#  # pubchem caches
#  pubchem_cid_cache <- rds_database(
#      source = system.file("cached/pubchem_cid_cache.rds",
#          package = "MetMashR"
#      )
#  )
#  pubchem_smile_cache <- rds_database(
#      source = system.file("cached/pubchem_smiles_cache.rds",
#          package = "MetMashR"
#      )
#  )

## ----eval=TRUE,include=FALSE--------------------------------------------------
refmet <- mwb_refmet_database()

pubchem_cid_cache <- rds_database(
    source = file.path(
        system.file("cached", package = "MetMashR"),
        "pubchem_cid_cache.rds"
    )
)
pubchem_smile_cache <- rds_database(
    source = file.path(
        system.file("cached", package = "MetMashR"),
        "pubchem_smiles_cache.rds"
    )
)

## ----message=FALSE, include=TRUE, eval=TRUE-----------------------------------
# prepare sequence
M <- import_source() +
    drop_empty_columns() +
    remove_suffix(
        clean = TRUE,
        column_name = "metabolite_name"
    ) +
    database_lookup(
        query_column = "refmet_name",
        database_column = "name",
        database = refmet,
        suffix = "_mwb",
        include = "pubchem_cid"
    ) +
    pubchem_compound_lookup(
        query_column = "metabolite_name",
        search_by = "name",
        suffix = "_pc",
        output = "cids",
        records = "best",
        delay = 0.2,
        cache = pubchem_cid_cache
    ) +
    prioritise_columns(
        column_names = c("pubchem_cid_mwb", "CID_pc"),
        output_name = "pubchem_cid",
        source_name = "pubchem_cid_source",
        source_tags = c("mwb", "pc"),
        clean = TRUE
    ) +
    pubchem_property_lookup(
        query_column = "pubchem_cid",
        search_by = "cid",
        suffix = "",
        property = "CanonicalSMILES",
        delay = 0.2,
        cache = pubchem_smile_cache
    )

# apply sequence
M <- model_apply(M, mwb_source(source = "AN000465"))

## -----------------------------------------------------------------------------
# prepare chart
C <- openbabel_structure(
    smiles_column = "CanonicalSMILES",
    row_index = 1,
    scale_to_fit = FALSE,
    view_port = 400,
    image_size = 500
)

# loop over some records and plot some of the molecules
G <- list()
x <- 1
for (k in c(3, 5)) {
    C$row_index <- k
    G[[x]] <- chart_plot(C, predicted(M))
    x <- x + 1
}
cowplot::plot_grid(plotlist = G, nrow = 1, labels = "AUTO")

## -----------------------------------------------------------------------------
sessionInfo()