navinlabcode
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 4 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 21 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎R/calcInteger.R‎
Lines changed: 150 additions & 60 deletions b/‎R/calcInteger.R‎
Lines changed: 150 additions & 60 deletions
@@ -2,7 +2,7 @@ Package: copykit
 Title: CopyKit
 URL: https://github.com/navinlabcode/copykit
 BugReports: https://github.com/navinlabcode/copykit/issues
-Version: 0.1.1
+Version: 0.1.2
 Authors@R: c(
     person(given = "Darlan",
            family = "Conterno Minussi",
@@ -35,7 +35,6 @@ Suggests:
     testthat,
     spelling,
     knitr,
-    scquantum,
     rmarkdown,
     stringr
 Language: en-US
@@ -77,7 +76,8 @@ Imports:
     ggtree,
     methods,
     withr,
-    bluster
+    bluster,
+    scquantum
 RoxygenNote: 7.1.2
 VignetteBuilder: knitr
 Depends: 
 
@@ -30,6 +30,7 @@ export(find_scaffold_genes)
 export(graph)
 export(hdbscanCBI)
 export(inferMrca)
+export(knnSmooth)
 export(l2e.normal.sd)
 export(leidenCBI)
 export(logNorm)
@@ -96,6 +97,7 @@ importClassesFrom(SingleCellExperiment,SingleCellExperiment)
 importClassesFrom(SummarizedExperiment,RangedSummarizedExperiment)
 importFrom(BiocGenerics,`%in%`)
 importFrom(BiocGenerics,subset)
+importFrom(BiocNeighbors,findKNN)
 importFrom(BiocParallel,bplapply)
 importFrom(BiocParallel,bpparam)
 importFrom(DNAcopy,CNA)
@@ -164,6 +166,8 @@ importFrom(miniUI,miniPage)
 importFrom(mixtools,normalmixEM)
 importFrom(scales,hue_pal)
 importFrom(scales,percent_format)
+importFrom(scquantum,ploidy.inference)
+importFrom(scquantum,timeseries.iod)
 importFrom(shiny,checkboxGroupInput)
 importFrom(shiny,fillCol)
 importFrom(shiny,plotOutput)
 
@@ -1,7 +1,28 @@
 # copykit 0.1.2
 
+**New features**
+* New cell smoothing method with the function `knnSmooth()`. Uses k-nearest neighbors to smooth cells profiles and re-segments the datasets obtaining cleaner copy number profiles, with reduced overdispersion and improving downstream analysis. (Thanks to [Runmin Wei]([)https://github.com/WandeRum) for the helpful discussion.)
+* scquantum method is available for `calcInteger()` and is now a CopyKit import ([scquantum](https://github.com/navinlabcode/scquantum) is a single cell ploidy estimation tool developed by [Alexander Davis](https://github.com/alex-l-m))
+* `calcInteger()` now accepts option methdo = 'metadata'. To use this option the user can add custom values of ploidy to every cell in the colData column 'ploidy' and run `calcInteger(ck, method = 'metadata')` to obtain the integer matrix on the CopyKit object.
+* runVst allows selection of the assay for the transformation
+
+**Changes**
+* `plotHeatmap()` order_cells argument now defaults to NULL. NULL option respects the order of the CopyKit object. order_cells argument can be set to 'consensus_tree' and 'hclust'.
+
+* Method 'scquantum' from `calcInteger()` adds 3 elements to the colData.
+1. ploidy: contanining the inferred ploidy call for each cell
+2. confidence_ratio: ratio from scquantum inferred ploidy to scquantum theoretical ploidy
+3. ploidy_score: Score derived from the confidence ratio. Values closer to 0 indicate a better fit of the ploidy call
+
+* Significance thresholds for CBS alpha segmentation and Merge levels were reduced to increase sensitivity to focal amplifications.
+
+**Removed**
+* option 'phylogeny' from function argument `plotHeatmap()' 'order_cells' has been removed.
+
+**Bug Fixes**
 * Fixed error in plotGeneCopy not returning plots with geom violin and barplot. (Thanks to @Romeo1-1)
 * Fixed error in plotGeneCopy with duplicated sample names on a merged object. Now it warns the user of merged sample names. (Thanks to @Romeo1-1)
+* Allowing control of parameter merge_levels_alpha on `runVarbin()` and `runSegmentation()` to control the significance level of merge levels when merging not significant segments.
 
 
 # copykit 0.1.1
 
@@ -1,6 +1,6 @@
 #' calcInteger()
 #'
-#' Calculates the integer copy number profile for each single-cell
+#' Calculates the integer copy number profile for each single cell
 #'
 #' @param scCNA The CopyKit object.
 #' @param assay String with the name of the assay to pull data from to calculate
@@ -16,97 +16,187 @@
 #' should be parallelized.
 #'
 #' @details
+#'
+#' CopyKit support the following methods for calculating integer copy number
+#' matrices
 #' \itemize{
 #' \item{fixed:} When method argument is set to 'fixed' copykit extracts the
 #' segment means from the scCNA object and multiplies those means by the value
 #' provided in the argument ploidy_value.
+#'
+#'
+#' \item{scquantum:} When the method argument is set to 'scquantum', CopyKit
+#' applies \code{\link[scquantum]{ploidy.inference}} function to perform a
+#' sample wise calculation returning the estimated compuational ploidy for
+#' every single cell
 #' }
 #'
-#' @return The scCNA object with an assay slot named 'integer' that contains
-#' a data frame with cells as columns and integerized segments as rows.
+#' @return The CopyKit object with an assay slot named 'integer' that contains
+#' a data frame with cells as columns and integerized segments as rows. And, in
+#' case of method = 'scquantum' CopyKit adds three new elements to \code{colData}
+#'  named 'ploidy' and 'ploidy_score' and the 'confidence ratio' obtained from
+#'  scquantum for each cell.
+#'
 #' @export
 #'
 #' @importFrom S4Vectors metadata
 #' @importFrom SummarizedExperiment assay colData rowRanges
+#' @importFrom scquantum ploidy.inference timeseries.iod
 #'
 #' @examples
 #' copykit_obj <- mock_bincounts(ncells_diploid = 0, ncells = 10)
 #' copykit_obj <- calcInteger(copykit_obj, method = "scquantum")
 
 calcInteger <- function(scCNA,
-                        assay = "bincounts",
+                        assay = c("bincounts",
+                                  'smoothed_bincounts',
+                                  "segment_ratios"),
                         method = "fixed",
                         ploidy_value = NULL,
                         name = "integer",
                         penalty = 25,
                         BPPARAM = bpparam()) {
+  # args
+  assay = match.arg(assay)
+
+  if (!is.null(assay(scCNA, 'smoothed_bincounts')) && assay == 'bincounts'
+      && method == 'scquantum') {
+    warning("CopyKit detected that knnSmooth() has been performed.
+If working with knnSmooth datasets we recommend using assay 'segment_ratios':
+calcInteger(copykit, assay = 'segment_ratios')")
+  }
+
+  # getting datasets
+  if (assay == 'bincounts') {
+    bin <- SummarizedExperiment::assay(scCNA, 'bincounts')
+  }
+
+  if (assay %in% c('smoothed_bincounts','segment_ratios')) {
+    bin <- SummarizedExperiment::assay(scCNA, 'smoothed_bincounts')
+  }
+
+  seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
+
+  if (!is.null(ploidy_value)) {
+    if (method == "fixed") {
+      if (is.null(ploidy_value) && !is.numeric(ploidy_value)) {
+        stop("Method fixed requires a numeric value for ploidy_value.")
+      }
+
+      message("Scaling ratio values by ploidy value ",
+              ploidy_value)
+
+      # ploidy values are added to colData information
+      SummarizedExperiment::colData(scCNA)$ploidy <-
+        ploidy_value
+
+      # saving ploidy scaling method
+      S4Vectors::metadata(scCNA)$ploidy_method <- "fixed"
+    }
+  }
+
+  if (method == "metadata") {
+    # method metadata just allows the segment ratios to be integerized based
+    # on the values for each cell in the colData(scCNA)$ploidy information.
+    if (!is.null(colData(scCNA)$ploidy)) {
+      message('Calculating integer values based on colData(scCNA)$ploidy info.')
+    } else {
+      stop("Method 'metadata' requires colData(scCNA)$ploidy information.")
+    }
 
-    df <- SummarizedExperiment::assay(scCNA, assay)
-    seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
-
+  }
+
+  # logic for scquantum method
+  if (method == "scquantum") {
+    rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA))
+
+    # method scquantum with input bincounts
+    if (assay %in% c('bincounts', 'smoothed_bincounts')) {
+      sc_quants <-
+        BiocParallel::bplapply(
+          assay(scCNA, assay),
+          scquantum::ploidy.inference,
+          chrom = rg$seqnames,
+          start = rg$start,
+          end = rg$end,
+          penalty = penalty,
+          BPPARAM = BPPARAM
+        )
+    }
 
-    if (!is.null(ploidy_value)) {
-        if (method == "fixed") {
-            if (is.null(ploidy_value) && !is.numeric(ploidy_value)) {
-                stop("Method fixed requires a numeric value for ploidy_value.")
-            }
+    # method scquantum with input segment ratios
+    if (assay == 'segment_ratios') {
 
-            message("Scaling ratio values by ploidy value ",
-                    ploidy_value)
+      sc_quants <- BiocParallel::bplapply(seq_along(seg), function(z) {
+        # extracting segments rle id and lengths
+        segnums <- cumsum(c(TRUE, abs(diff(seg[, z])) > 0.00001))
+        seg_length <- rle(seg[, z])$lengths
 
-            # ploidy values are added to colData information
-            SummarizedExperiment::colData(scCNA)$ploidy <-
-                ploidy_value
+        # extracting segment-wise means and index of dispersion
+        seg_bins_mean <- tapply(bin[, z], segnums, mean)
 
-            # saving ploidy scaling method
-            S4Vectors::metadata(scCNA)$ploidy_method <- "fixed"
+        if (any(seg_length <= 3)) {
+          iod.est <- scquantum::timeseries.iod(bin[,z])
+        } else {
+          iod.est <-
+            tapply(bin[, z], segnums, scquantum::timeseries.iod)
         }
-    }
 
-    if (method == "scquantum") {
-        rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA))
-
-        sc_quants <-
-            BiocParallel::bplapply(
-                assay(scCNA, assay),
-                scquantum::ploidy.inference,
-                chrom = rg$seqnames,
-                start = rg$start,
-                end = rg$end,
-                penalty = penalty,
-                BPPARAM = BPPARAM
-            )
-
-        # extracting ploidies from sc_quantum object
-        sc_ploidies <-
-            vapply(sc_quants, function(x)
-                x$ploidy, numeric(1))
-
-        # extracting ploidies from sc_quantum object
-        sc_confidence <-
-            vapply(sc_quants, function(x)
-                x$confidence_ratio, numeric(1))
-
-        SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies
-        SummarizedExperiment::colData(scCNA)$ploidy_confidence <-
-            sc_confidence
-    }
+        # bincount mean estimate
+        mean.est <- mean(bin[, z])
+
+        estimates <- scquantum::ploidy.inference(
+          x = seg_bins_mean,
+          chrom = NULL,
+          start = NULL,
+          end = NULL,
+          seg_length = seg_length,
+          iod = iod.est,
+          mean_bincount = mean.est,
+          do_segmentation = FALSE
+        )
+
+      })
+
 
-    # check to guarantee multiplication
-    if (!identical(names(df), colData(scCNA)$sample)) {
-        stop("Order of cells in segment_ratios and colData() is not identical.")
     }
 
-    # obtain the matrix of integer values by multiplying the seg ratios
-    # by the diagonal of the ploidy colData vector
-    int_values <-
-        round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>%
-        as.data.frame()
+    # extracting ploidies from sc_quantum object
+    sc_ploidies <-
+      vapply(sc_quants, function(x)
+        x$ploidy, numeric(1))
+
+    # extracting ploidies from sc_quantum object
+    sc_confidence <-
+      vapply(sc_quants, function(x)
+        x$confidence_ratio, numeric(1))
+
+    # calculating ploidy score from scquantum confidence ratio
+    ploidy_score <- abs(1-sc_confidence)
+
+    SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies
+    SummarizedExperiment::colData(scCNA)$confidence_ratio <-
+      sc_confidence
+    SummarizedExperiment::colData(scCNA)$ploidy_score <-
+      ploidy_score
+
+  }
+
+  # check to guarantee multiplication
+  if (!identical(names(bin), colData(scCNA)$sample)) {
+    stop("Order of cells in segment_ratios and colData() is not identical.")
+  }
+
+  # obtain the matrix of integer values by multiplying the seg ratios
+  # by the diagonal of the ploidy colData vector
+  int_values <-
+    round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>%
+    as.data.frame()
 
-    # recovering names
-    names(int_values) <- names(seg)
+  # recovering names
+  names(int_values) <- names(seg)
 
-    SummarizedExperiment::assay(scCNA, name) <- int_values
+  SummarizedExperiment::assay(scCNA, name) <- int_values
 
-    return(scCNA)
+  return(scCNA)
 }