## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----install, eval = FALSE----------------------------------------------------
#  if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager", repos = "https://CRAN.R-project.org")
#  BiocManager::install("cellxgenedp")

## ----setup, message = FALSE---------------------------------------------------
library(cellxgenedp)

## -----------------------------------------------------------------------------
author_datasets <- left_join(
    authors(),
    datasets(),
    by = "collection_id",
    relationship = "many-to-many"
)
author_datasets

## -----------------------------------------------------------------------------
author_datasets |>
    count(family, given, sort = TRUE)

## ----prolific authors---------------------------------------------------------
prolific_authors <-
    authors() |>
    count(family, given, sort = TRUE) |>
    slice(1:5)
prolific_authors

## ----prolific-author-datasets-------------------------------------------------
right_join(
    author_datasets,
    prolific_authors,
    by = c("family", "given")
)

## ----specific-authors---------------------------------------------------------
author_datasets |>
    filter(
        family %in% c("Teichmann", "Regev", "Haniffa")
    )

## ----authors-of-interest------------------------------------------------------
authors_of_interest <-
    tibble(
        family = c("Teichmann", "Regev", "Haniffa"),
        given = c("Sarah A.", "Aviv", "Muzlifah")
    )
right_join(
    author_datasets,
    authors_of_interest,
    by = c("family", "given")
)

## ----disease------------------------------------------------------------------
author_datasets |>
    select(family, given, dataset_id, disease)

## ----disease-facets-----------------------------------------------------------
facets(db(), "disease")

## ----disease-facet-filter-----------------------------------------------------
author_datasets |>
    filter(facets_filter(disease, "label", "COVID-19"))

## ----disease-facet-fitler-authors---------------------------------------------
author_datasets |>
    filter(facets_filter(disease, "label", "COVID-19")) |>
    count(family, given, sort = TRUE)

## ----disease-unnest-----------------------------------------------------------
author_dataset_diseases <-
    author_datasets |>
    select(family, given, dataset_id, disease) |>
    tidyr::unnest_longer(disease) |>
    tidyr::unnest_wider(disease)
author_dataset_diseases

## ----covid-19, eval = FALSE---------------------------------------------------
#  author_dataset_diseases |>
#      filter(label == "COVID-19")
#  
#  author_dataset_diseases |>
#      filter(label == "COVID-19") |>
#      count(family, given, sort = TRUE)

## -----------------------------------------------------------------------------
authors <- authors()
authors

## -----------------------------------------------------------------------------
nrow(authors) == nrow(distinct(authors))

## -----------------------------------------------------------------------------
authors |> 
    count(collection_id, family, given, consortium, sort = TRUE) |>
    filter(n > 1)

## -----------------------------------------------------------------------------
duplicate_authors <-
    collections() |>
    filter(collection_id == "e5f58829-1a66-40b5-a624-9046778e74f5")
duplicate_authors

## -----------------------------------------------------------------------------
publisher_metadata <-
    duplicate_authors |>
    pull(publisher_metadata)

## -----------------------------------------------------------------------------
names(publisher_metadata[[1]])

## -----------------------------------------------------------------------------
length(publisher_metadata[[1]][["authors"]])

## -----------------------------------------------------------------------------
deduplicated_authors <- distinct(authors)

## ----sessionInfo, echo = FALSE------------------------------------------------
sessionInfo()