## ---- echo=FALSE--------------------------------------------------------------
source(system.file('vignettes_inc.R', package='biodb'))

## -----------------------------------------------------------------------------
biodb::genNewExtPkg(path='biodbChebiEx', dbName='chebi.ex', connType='compound',
                    dbTitle='ChEBI connector example', entryType='xml', remote=TRUE)

## -----------------------------------------------------------------------------
list.files('biodbChebiEx', all.files=TRUE, recursive=TRUE)

## ---- eval=FALSE, highlight=FALSE, code=readLines('biodbChebiEx/inst/definitions.yml')----
#  # biodb example definitions file for extensions packages, version 1.0.0
#  
#  databases:
#    chebi.ex:
#      name: ChEBI connector example
#      description: Write here the description of this database.
#      compound.db: true
#      entry.content.type: xml
#      parsing.expr:
#        accession: substring-after(//dbns:return/dbns:accessionId,'ACCESSION:')
#        name:
#        - //dbns:name
#        - //dbns:synonyms/dbns:data
#        mass: //dbns:mass
#        monoisotopic.mass: //dbns:monoisotopicMass
#        smiles: //dbns:return/dbns:smiles
#        inchi: //dbns:return/dbns:inchi
#        inchikey: //dbns:return/dbns:inchiKey
#        formula:
#        - //dbns:Formulae/dbns:source[text()='MyDatabase']/../dbns:data
#        - (//dbns:Formulae/dbns:data)[1]
#      xml.ns:
#        dbns: https://my.database.org/webservices/v1
#        xsd: http://www.w3.org/2001/XMLSchema
#      searchable.fields:
#        - name
#        - monoisotopic.mass
#        - molecular.mass
#        - average.mass
#        - nominal.mass
#      remote: true
#      # Length in seconds of the connection sliding window
#      scheduler.t: 1
#      # Number of connections allowed inside the connection sliding window
#      scheduler.n: 3
#      urls:
#        # Base URL of the database server, where to find entry pages
#        base.url: https://my.database.org/mydb/
#        # Webservice URL to use to contact web services
#        ws.url: https://my.database.org/webservices/mydb/3.2/
#        # Add any other URL you need for the development of your connector
#        # Inside your code, you can get each of these URLs with a call like the following one:
#        #     .self$getPropValSlot('urls', 'ws.url')
#  
#  fields:
#    chebi.ex.id:
#      description: ChEBI connector example ID
#      case.insensitive: true
#      forbids.duplicates: true
#      type: id
#      card: many

## -----------------------------------------------------------------------------
defFile <- system.file("extdata", "chebi_ex.yml", package='biodb')

## ---- eval=FALSE, highlight=FALSE, code=readLines(system.file("extdata", "chebi_ex.yml", package='biodb'))----
#  databases:
#  
#    chebi.ex:
#      name: ChEBI example connector
#      description: An example connector for ChEBI.
#      compound.db: true
#      entry.content.encoding: UTF-8
#      entry.content.type: xml
#      parsing.expr:
#        accession: substring-after(//chebi:return/chebi:chebiId,'CHEBI:')
#        formula:
#          - //chebi:Formulae/chebi:source[text()='ChEBI']/../chebi:data
#          - (//chebi:Formulae/chebi:data)[1]
#        inchi: //chebi:return/chebi:inchi
#        inchikey: //chebi:return/chebi:inchiKey
#        mass: //chebi:mass
#        monoisotopic.mass: //chebi:monoisotopicMass
#        name:
#          - //chebi:chebiAsciiName
#        smiles: //chebi:return/chebi:smiles
#      searchable.fields:
#        - name
#        - monoisotopic.mass
#        - molecular.mass
#      remote: true
#      scheduler.t: 1
#      scheduler.n: 3
#      urls:
#        base.url: https://www.ebi.ac.uk/chebi/
#        ws.url: https://www.ebi.ac.uk/webservices/chebi/2.0/
#      xml.ns:
#        chebi: https://www.ebi.ac.uk/webservices/chebi
#        xsd: http://www.w3.org/2001/XMLSchema
#  
#  fields:
#  
#    chebi.ex.id:
#      description: ChEBI ID
#      type: id
#      card: many
#      forbids.duplicates: true
#      case.insensitive: true

## ---- eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExEntry.R')----
#  #' ChEBI connector example entry class.
#  #'
#  #' Entry class for ChEBI connector example.
#  #'
#  #' @seealso
#  #' \code{\link{BiodbXmlEntry}}.
#  #'
#  #' @examples
#  #' # Create an instance with default settings:
#  #' mybiodb <- biodb::Biodb()
#  #'
#  #' # Get a connector that inherits from ChebiExConn:
#  #' conn <- mybiodb$getFactory()$createConn('chebi.ex')
#  #'
#  #' # Get the first entry
#  #' e <- conn$getEntry(conn$getEntryIds(1L))
#  #'
#  #' # Terminate instance.
#  #' mybiodb$terminate()
#  #'
#  #' @import biodb
#  #' @import R6
#  #' @export
#  ChebiExEntry <- R6::R6Class("ChebiExEntry",
#      inherit=
#          biodb::BiodbXmlEntry
#      ,
#  
#  public=list(
#  
#  initialize=function(...) {
#      super$initialize(...)
#  }
#  
#  ,doCheckContent=function(content) {
#  
#      # You can do some more checks of the content here.
#  
#      return(TRUE)
#  }
#  
#  ,doParseFieldsStep2=function(parsed.content) {
#  
#      # TODO Implement your custom parsing processing here.
#  }
#  ))

## ---- echo=FALSE, results='asis'----------------------------------------------
make_vignette_ref('details')

## ---- eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExConn.R')----
#  #' ChEBI connector example connector class.
#  #'
#  #' Connector class for ChEBI connector example.
#  #'
#  #' @seealso \code{\link{BiodbConn}}.
#  #'
#  #' @examples
#  #' # Create an instance with default settings:
#  #' mybiodb <- biodb::Biodb()
#  #'
#  #' # Get a connector:
#  #' conn <- mybiodb$getFactory()$createConn('chebi.ex')
#  #'
#  #' # Get the first entry
#  #' e <- conn$getEntry(conn$getEntryIds(1L))
#  #'
#  #' # Terminate instance.
#  #' mybiodb$terminate()
#  #'
#  #' @import biodb
#  #' @import R6
#  #' @export
#  ChebiExConn <- R6::R6Class("ChebiExConn",
#  inherit=biodb::BiodbConn,
#  
#  public=list(
#  
#  initialize=function(...) {
#      super$initialize(...)
#  }
#  
#  ,wsFind=function(name="", retfmt=c('plain', 'parsed', 'ids', 'request')) {
#      # This is the implementation of a fictive web service called "find" that
#      # search for entries by name.
#      # Use it as an example for implementing your own web services.
#  
#      retfmt <- match.arg(retfmt)
#  
#      # Build request
#      params <- list(name=name)
#      url <- BiodbUrl(url=c(.self$getPropValSlot('urls', 'ws.url'), 'find'),
#                      params=params)
#      request <- .self$makeRequest(method='get', url=url)
#  
#      # Return request
#      if (retfmt == 'request')
#          return(request)
#  
#      # Send request
#      # This the line that should be run for sending the request and getting the
#      # results:
#      #results <- .self$getBiodb()$getRequestScheduler()$sendRequest(request)
#      # Instead, for this example, we just generate the results of this fictive
#      # web service:
#      results <- paste('{"0001": {"name": "name1"},',
#                       ' "0198": {"name": "name2"},',
#                       ' "9834": {"name": "name3"}}')
#  
#      # Parse
#      if (retfmt != 'plain') {
#  
#          # Parse JSON
#          results <- jsonlite::fromJSON(results, simplifyDataFrame=FALSE)
#  
#          # Get IDs
#          if (retfmt == 'ids')
#              results <- names(results)
#      }
#  
#      return(results)
#  }
#  ),
#  
#  private=list(
#  
#  doGetNbEntries=function(count=FALSE) {
#  
#      # Replace the call below if you have a direct way (specific web service for
#      # a remote database, provided method or information for a local database)
#      # to count entries for your database.
#      return(callSuper(count=count))
#  }
#  
#  ,doGetEntryContentFromDb=function(id) {
#  
#      # Initialize return values
#      content <- rep(NA_character_, length(id))
#  
#      # TODO Implement retrieval of entry contents.
#  
#      # Some debug message
#      if (length(content) > 0)
#          .self$message('debug', paste("Content of first entry:", content[[1]]))
#  
#      return(content)
#  }
#  
#  ,doGetEntryIds=function(max.results=NA_integer_) {
#      # Overrides super class' method.
#  
#      ids <- NA_character_
#  
#      # TODO Implement retrieval of accession numbers.
#  
#      return(ids)
#  }
#  
#  ,doSearchForEntries=function(fields=NULL, max.results=NA_integer_) {
#      # Overrides super class' method.
#  
#      ids <- character()
#  
#      # TODO Implement search of entries by filtering on values of fields.
#  
#      return(ids)
#  }
#  
#  ,doGetEntryContentRequest=function(id, concatenate=TRUE) {
#  
#      # TODO Modify the code below to build the URLs to get the contents of the
#      # entries.
#      # Depending on the database, you may have to build one URL for each
#      # individual entry or may be able to write just one or a few URL for all
#      # entries to retrieve.
#      u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries',
#             paste(id, 'xml', sep='.'))
#      url <- BiodbUrl(url=u)$toString()
#  
#      return(url)
#  }
#  
#  ,doGetEntryPageUrl=function(id) {
#  
#      # TODO Modify this code to build the individual URLs to the entry web pages
#      fct <- function(x) {
#          u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries', x)
#          BiodbUrl(url=u)$toString()
#      }
#  
#      return(vapply(id, fct, FUN.VALUE=''))
#  }
#  
#  ,doGetEntryImageUrl=function(id) {
#  
#      # TODO Modify this code to build the individual URLs to the entry images
#      fct <- function(x) {
#          u <- c(.self$getPropValSlot('urls', 'base.url'), 'images', x,
#                 'image.png')
#          BiodbUrl(url=u)$toString()
#      }
#  
#      return(vapply(id, fct, FUN.VALUE=''))
#  }
#  ))

## ---- eval=FALSE--------------------------------------------------------------
#  wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
#                           max.results=10,
#                           retfmt=c('plain', 'parsed', 'request', 'ids')) {
#  }

## ---- echo=FALSE, results='hide'----------------------------------------------
connClass <- system.file("extdata", "ChebiExConn.R", package='biodb')
entryClass <- system.file("extdata", "ChebiExEntry.R", package='biodb')
source(connClass)
source(entryClass)

## ---- code=readLines(connClass)-----------------------------------------------
ChebiExConn <- R6::R6Class("ChebiExConn",
inherit=biodb::BiodbConn,

public=list(

initialize=function(...) {
    super$initialize(...)
},

wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
                         max.results=10,
                         retfmt=c('plain', 'parsed', 'request', 'ids')) {

    # Check parameters
    chk::chk_string(search)
    chk::chk_in(search.category, self$getSearchCategories())
    chk::chk_number(max.results)
    chk::chk_gte(max.results, 0)
    chk::chk_in(stars, self$getStarsCategories())
    retfmt <- match.arg(retfmt)

    # Build request
    params <- c(search=search,
                searchCategory=search.category,
                maximumResults=max.results,
                starsCategory=stars)
    url <- c(self$getPropValSlot('urls', 'ws.url'), 'test/getLiteEntity')
    request <- self$makeRequest(method='get', url=BiodbUrl$new(url=url,
                                                                params=params),
                                 encoding='UTF-8')
    if (retfmt == 'request')
        return(request)

    # Send request
    results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)

    # Parse
    if (retfmt != 'plain') {

        # Parse XML
        results <-  XML::xmlInternalTreeParse(results, asText=TRUE)

        if (retfmt == 'ids') {
            ns <- self$getPropertyValue('xml.ns')
            results <- XML::xpathSApply(results, "//chebi:chebiId",
                                        XML::xmlValue, namespaces=ns)
            results <- sub('CHEBI:', '', results)
            if (length(grep("^[0-9]+$", results)) != length(results))
                self$error("Impossible to parse XML to get entry IDs.")
        }
    }

    return(results)
}
),

private=list(
doSearchForEntries=function(fields=NULL, max.results=0) {

    ids <- character()

    if ( ! is.null(fields)) {

        # Search by name
        if ('name' %in% names(fields))
            ids <- self$wsGetLiteEntity(search=fields$name,
                                         search.category="ALL NAMES",
                                         max.results=0, retfmt='ids')
    }

    # Cut
    if (max.results > 0 && max.results < length(ids))
        ids <- ids[seq_len(max.results)]

    return(ids)
},

doGetEntryContentRequest=function(id, concatenate=TRUE) {

    url <- c(self$getPropValSlot('urls', 'ws.url'), 'test',
             'getCompleteEntity')

    urls <- vapply(id, function(x) BiodbUrl$new(url=url,
                                            params=list(chebiId=x))$toString(),
                   FUN.VALUE='')

    return(urls)
},

doGetEntryIds=function(max.results=NA_integer_) {
    return(NULL)
},

doGetEntryPageUrl=function(id) {
    # Overrides super class' method

    url <- c(self$getPropValSlot('urls', 'base.url'), 'searchId.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(chebiId=x))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
},

doGetEntryImageUrl=function(id) {
    # Overrides super class' method

    url <- c(self$getPropValSlot('urls', 'base.url'), 'displayImage.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(defaultImage='true', imageIndex=0,
                                      chebiId=x, dimensions=400))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
}
))

## ---- code=readLines(entryClass)----------------------------------------------
ChebiExEntry <- R6::R6Class("ChebiExEntry",
inherit=BiodbXmlEntry,

public=list(

initialize=function(...) {
    super$initialize(...)
}
),

private=list(
doCheck=function(content) {
    
    # You can do some more checks of the content here.
    
    return(TRUE)
}

,doParseFieldsStep2=function(parsed.content) {
    
    # TODO Implement your custom parsing processing here.
}

))

## -----------------------------------------------------------------------------
mybiodb <- biodb::newInst()

## -----------------------------------------------------------------------------
mybiodb$loadDefinitions(defFile)

## -----------------------------------------------------------------------------
conn <- mybiodb$getFactory()$createConn('chebi.ex')

## -----------------------------------------------------------------------------
entry <- conn$getEntry('17001')
entry$getFieldsAsDataframe()

## ----Closing of the biodb instance--------------------------------------------
mybiodb$terminate()

## -----------------------------------------------------------------------------
MyEntryClass <- R6::R6Class("MyEntryClass", inherit=biodb::BiodbCsvEntry,
    public=list(
        initialize=function() {
            super$initialize(sep=';', na.strings=c('', 'NA'))
        }
))

## ---- eval=FALSE--------------------------------------------------------------
#  doParseContent=function(content) {
#  
#      # Get lines of content
#      lines <- strsplit(content, "\r?\n")[[1]]
#  
#      return(lines)
#  },
#  
#  doParseFieldsStep1=function(parsed.content) {
#  
#      # Get parsing expressions
#      parsing.expr <- .self$getParent()$getPropertyValue('parsing.expr')
#  
#      .self$.assertNotNull(parsed.content)
#      .self$.assertNotNa(parsed.content)
#      .self$.assertNotNull(parsing.expr)
#      .self$.assertNotNa(parsing.expr)
#      .self$.assertNotNull(names(parsing.expr))
#  
#      # Loop on all parsing expressions
#      for (field in names(parsing.expr)) {
#  
#          # Match whole content
#          g <- stringr::str_match(parsed.content, parsing.expr[[field]])
#  
#          # Get positive results
#          results <- g[ ! is.na(g[, 1]), , drop=FALSE]
#  
#          # Any match ?
#          if (nrow(results) > 0)
#              .self$setFieldValue(field, results[, 2])
#      }
#  }

## ---- eval=FALSE--------------------------------------------------------------
#  doParseFieldsStep2=function(parsed.content) {
#  
#      # Remove fields with empty string
#      for (f in .self$getFieldNames()) {
#          v <- .self$getFieldValue(f)
#          if (is.character(v) && ! is.na(v) && v == '')
#              .self$removeField(f)
#      }
#  
#      # Correct InChIKey
#      if (.self$hasField('INCHIKEY')) {
#          v <- sub('^InChIKey=', '', .self$getFieldValue('INCHIKEY'), perl=TRUE)
#          .self$setFieldValue('INCHIKEY', v)
#      }
#  
#      # Synonyms
#      synonyms <- XML::xpathSApply(parsed.content, "//synonym", XML::xmlValue)
#      if (length(synonyms) > 0)
#          .self$appendFieldValue('name', synonyms)
#  }

## -----------------------------------------------------------------------------
sessionInfo()