## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(DuoClustering2018)
})

scename <- "sce_filteredExpr10_Koh"
sce <- sce_filteredExpr10_Koh()
method <- "PCAHC"

## -----------------------------------------------------------------------------
## Load parameter files. General dataset and method parameters as well as
## dataset/method-specific parameters
params <- duo_clustering_all_parameter_settings_v2()[[paste0(scename, "_", 
                                                             method)]]
params

## ----eval=FALSE---------------------------------------------------------------
# ## Set number of times to run clustering for each k
# n_rep <- 5
# 
# ## Run clustering
# set.seed(1234)
# L <- lapply(seq_len(n_rep), function(i) {  ## For each run
#   cat(paste0("run = ", i, "\n"))
#   if (method == "Seurat") {
#     tmp <- lapply(params$range_resolutions, function(resolution) {
#       ## For each resolution
#       cat(paste0("resolution = ", resolution, "\n"))
#       ## Run clustering
#       res <- get(paste0("apply_", method))(sce = sce, params = params,
#                                            resolution = resolution)
# 
#       ## Put output in data frame
#       df <- data.frame(dataset = scename,
#                        method = method,
#                        cell = names(res$cluster),
#                        run = i,
#                        k = length(unique(res$cluster)),
#                        resolution = resolution,
#                        cluster = res$cluster,
#                        stringsAsFactors = FALSE, row.names = NULL)
#       tm <- data.frame(dataset = scename,
#                        method = method,
#                        run = i,
#                        k = length(unique(res$cluster)),
#                        resolution = resolution,
#                        user.self = res$st[["user.self"]],
#                        sys.self = res$st[["sys.self"]],
#                        user.child = res$st[["user.child"]],
#                        sys.child = res$st[["sys.child"]],
#                        elapsed = res$st[["elapsed"]],
#                        stringsAsFactors = FALSE, row.names = NULL)
#       kest <- data.frame(dataset = scename,
#                          method = method,
#                          run = i,
#                          k = length(unique(res$cluster)),
#                          resolution = resolution,
#                          est_k = res$est_k,
#                          stringsAsFactors = FALSE, row.names = NULL)
#       list(clusters = df, timing = tm, kest = kest)
#     })  ## End for each resolution
#   } else {
#     tmp <- lapply(params$range_clusters, function(k) {  ## For each k
#       cat(paste0("k = ", k, "\n"))
#       ## Run clustering
#       res <- get(paste0("apply_", method))(sce = sce, params = params, k = k)
# 
#       ## Put output in data frame
#       df <- data.frame(dataset = scename,
#                        method = method,
#                        cell = names(res$cluster),
#                        run = i,
#                        k = k,
#                        resolution = NA,
#                        cluster = res$cluster,
#                        stringsAsFactors = FALSE, row.names = NULL)
#       tm <- data.frame(dataset = scename,
#                        method = method,
#                        run = i,
#                        k = k,
#                        resolution = NA,
#                        user.self = res$st[["user.self"]],
#                        sys.self = res$st[["sys.self"]],
#                        user.child = res$st[["user.child"]],
#                        sys.child = res$st[["sys.child"]],
#                        elapsed = res$st[["elapsed"]],
#                        stringsAsFactors = FALSE, row.names = NULL)
#       kest <- data.frame(dataset = scename,
#                          method = method,
#                          run = i,
#                          k = k,
#                          resolution = NA,
#                          est_k = res$est_k,
#                          stringsAsFactors = FALSE, row.names = NULL)
#       list(clusters = df, timing = tm, kest = kest)
#     })  ## End for each k
#   }
# 
#   ## Summarize across different values of k
#   assignments <- do.call(rbind, lapply(tmp, function(w) w$clusters))
#   timings <- do.call(rbind, lapply(tmp, function(w) w$timing))
#   k_estimates <- do.call(rbind, lapply(tmp, function(w) w$kest))
#   list(assignments = assignments, timings = timings, k_estimates = k_estimates)
# })  ## End for each run
# 
# ## Summarize across different runs
# assignments <- do.call(rbind, lapply(L, function(w) w$assignments))
# timings <- do.call(rbind, lapply(L, function(w) w$timings))
# k_estimates <- do.call(rbind, lapply(L, function(w) w$k_estimates))
# 
# ## Add true group for each cell
# truth <- data.frame(cell = as.character(rownames(colData(sce))),
#                     trueclass = as.character(colData(sce)$phenoid),
#                     stringsAsFactors = FALSE)
# assignments$trueclass <- truth$trueclass[match(assignments$cell, truth$cell)]
# 
# ## Combine results
# res <- list(assignments = assignments, timings = timings,
#             k_estimates = k_estimates)
# 
# df <- dplyr::full_join(res$assignments %>%
#                          dplyr::select(dataset, method, cell, run, k,
#                                        resolution, cluster, trueclass),
#                        res$k_estimates %>%
#                          dplyr::select(dataset, method, run, k,
#                                        resolution, est_k)
# ) %>% dplyr::full_join(res$timings %>% dplyr::select(dataset, method, run, k,
#                                                      resolution, elapsed))

## -----------------------------------------------------------------------------
sessionInfo()

