|
1 | 1 | #' calcInteger() |
2 | 2 | #' |
3 | | -#' Calculates the integer copy number profile for each single-cell |
| 3 | +#' Calculates the integer copy number profile for each single cell |
4 | 4 | #' |
5 | 5 | #' @param scCNA The CopyKit object. |
6 | 6 | #' @param assay String with the name of the assay to pull data from to calculate |
|
16 | 16 | #' should be parallelized. |
17 | 17 | #' |
18 | 18 | #' @details |
| 19 | +#' |
| 20 | +#' CopyKit support the following methods for calculating integer copy number |
| 21 | +#' matrices |
19 | 22 | #' \itemize{ |
20 | 23 | #' \item{fixed:} When method argument is set to 'fixed' copykit extracts the |
21 | 24 | #' segment means from the scCNA object and multiplies those means by the value |
22 | 25 | #' provided in the argument ploidy_value. |
| 26 | +#' |
| 27 | +#' |
| 28 | +#' \item{scquantum:} When the method argument is set to 'scquantum', CopyKit |
| 29 | +#' applies \code{\link[scquantum]{ploidy.inference}} function to perform a |
| 30 | +#' sample wise calculation returning the estimated compuational ploidy for |
| 31 | +#' every single cell |
23 | 32 | #' } |
24 | 33 | #' |
25 | | -#' @return The scCNA object with an assay slot named 'integer' that contains |
26 | | -#' a data frame with cells as columns and integerized segments as rows. |
| 34 | +#' @return The CopyKit object with an assay slot named 'integer' that contains |
| 35 | +#' a data frame with cells as columns and integerized segments as rows. And, in |
| 36 | +#' case of method = 'scquantum' CopyKit adds three new elements to \code{colData} |
| 37 | +#' named 'ploidy' and 'ploidy_score' and the 'confidence ratio' obtained from |
| 38 | +#' scquantum for each cell. |
| 39 | +#' |
27 | 40 | #' @export |
28 | 41 | #' |
29 | 42 | #' @importFrom S4Vectors metadata |
30 | 43 | #' @importFrom SummarizedExperiment assay colData rowRanges |
| 44 | +#' @importFrom scquantum ploidy.inference timeseries.iod |
31 | 45 | #' |
32 | 46 | #' @examples |
33 | 47 | #' copykit_obj <- mock_bincounts(ncells_diploid = 0, ncells = 10) |
34 | 48 | #' copykit_obj <- calcInteger(copykit_obj, method = "scquantum") |
35 | 49 |
|
36 | 50 | calcInteger <- function(scCNA, |
37 | | - assay = "bincounts", |
| 51 | + assay = c("bincounts", |
| 52 | + 'smoothed_bincounts', |
| 53 | + "segment_ratios"), |
38 | 54 | method = "fixed", |
39 | 55 | ploidy_value = NULL, |
40 | 56 | name = "integer", |
41 | 57 | penalty = 25, |
42 | 58 | BPPARAM = bpparam()) { |
| 59 | + # args |
| 60 | + assay = match.arg(assay) |
| 61 | + |
| 62 | + if (!is.null(assay(scCNA, 'smoothed_bincounts')) && assay == 'bincounts' |
| 63 | + && method == 'scquantum') { |
| 64 | + warning("CopyKit detected that knnSmooth() has been performed. |
| 65 | +If working with knnSmooth datasets we recommend using assay 'segment_ratios': |
| 66 | +calcInteger(copykit, assay = 'segment_ratios')") |
| 67 | + } |
| 68 | + |
| 69 | + # getting datasets |
| 70 | + if (assay == 'bincounts') { |
| 71 | + bin <- SummarizedExperiment::assay(scCNA, 'bincounts') |
| 72 | + } |
| 73 | + |
| 74 | + if (assay %in% c('smoothed_bincounts','segment_ratios')) { |
| 75 | + bin <- SummarizedExperiment::assay(scCNA, 'smoothed_bincounts') |
| 76 | + } |
| 77 | + |
| 78 | + seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios') |
| 79 | + |
| 80 | + if (!is.null(ploidy_value)) { |
| 81 | + if (method == "fixed") { |
| 82 | + if (is.null(ploidy_value) && !is.numeric(ploidy_value)) { |
| 83 | + stop("Method fixed requires a numeric value for ploidy_value.") |
| 84 | + } |
| 85 | + |
| 86 | + message("Scaling ratio values by ploidy value ", |
| 87 | + ploidy_value) |
| 88 | + |
| 89 | + # ploidy values are added to colData information |
| 90 | + SummarizedExperiment::colData(scCNA)$ploidy <- |
| 91 | + ploidy_value |
| 92 | + |
| 93 | + # saving ploidy scaling method |
| 94 | + S4Vectors::metadata(scCNA)$ploidy_method <- "fixed" |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + if (method == "metadata") { |
| 99 | + # method metadata just allows the segment ratios to be integerized based |
| 100 | + # on the values for each cell in the colData(scCNA)$ploidy information. |
| 101 | + if (!is.null(colData(scCNA)$ploidy)) { |
| 102 | + message('Calculating integer values based on colData(scCNA)$ploidy info.') |
| 103 | + } else { |
| 104 | + stop("Method 'metadata' requires colData(scCNA)$ploidy information.") |
| 105 | + } |
43 | 106 |
|
44 | | - df <- SummarizedExperiment::assay(scCNA, assay) |
45 | | - seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios') |
46 | | - |
| 107 | + } |
| 108 | + |
| 109 | + # logic for scquantum method |
| 110 | + if (method == "scquantum") { |
| 111 | + rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA)) |
| 112 | + |
| 113 | + # method scquantum with input bincounts |
| 114 | + if (assay %in% c('bincounts', 'smoothed_bincounts')) { |
| 115 | + sc_quants <- |
| 116 | + BiocParallel::bplapply( |
| 117 | + assay(scCNA, assay), |
| 118 | + scquantum::ploidy.inference, |
| 119 | + chrom = rg$seqnames, |
| 120 | + start = rg$start, |
| 121 | + end = rg$end, |
| 122 | + penalty = penalty, |
| 123 | + BPPARAM = BPPARAM |
| 124 | + ) |
| 125 | + } |
47 | 126 |
|
48 | | - if (!is.null(ploidy_value)) { |
49 | | - if (method == "fixed") { |
50 | | - if (is.null(ploidy_value) && !is.numeric(ploidy_value)) { |
51 | | - stop("Method fixed requires a numeric value for ploidy_value.") |
52 | | - } |
| 127 | + # method scquantum with input segment ratios |
| 128 | + if (assay == 'segment_ratios') { |
53 | 129 |
|
54 | | - message("Scaling ratio values by ploidy value ", |
55 | | - ploidy_value) |
| 130 | + sc_quants <- BiocParallel::bplapply(seq_along(seg), function(z) { |
| 131 | + # extracting segments rle id and lengths |
| 132 | + segnums <- cumsum(c(TRUE, abs(diff(seg[, z])) > 0.00001)) |
| 133 | + seg_length <- rle(seg[, z])$lengths |
56 | 134 |
|
57 | | - # ploidy values are added to colData information |
58 | | - SummarizedExperiment::colData(scCNA)$ploidy <- |
59 | | - ploidy_value |
| 135 | + # extracting segment-wise means and index of dispersion |
| 136 | + seg_bins_mean <- tapply(bin[, z], segnums, mean) |
60 | 137 |
|
61 | | - # saving ploidy scaling method |
62 | | - S4Vectors::metadata(scCNA)$ploidy_method <- "fixed" |
| 138 | + if (any(seg_length <= 3)) { |
| 139 | + iod.est <- scquantum::timeseries.iod(bin[,z]) |
| 140 | + } else { |
| 141 | + iod.est <- |
| 142 | + tapply(bin[, z], segnums, scquantum::timeseries.iod) |
63 | 143 | } |
64 | | - } |
65 | 144 |
|
66 | | - if (method == "scquantum") { |
67 | | - rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA)) |
68 | | - |
69 | | - sc_quants <- |
70 | | - BiocParallel::bplapply( |
71 | | - assay(scCNA, assay), |
72 | | - scquantum::ploidy.inference, |
73 | | - chrom = rg$seqnames, |
74 | | - start = rg$start, |
75 | | - end = rg$end, |
76 | | - penalty = penalty, |
77 | | - BPPARAM = BPPARAM |
78 | | - ) |
79 | | - |
80 | | - # extracting ploidies from sc_quantum object |
81 | | - sc_ploidies <- |
82 | | - vapply(sc_quants, function(x) |
83 | | - x$ploidy, numeric(1)) |
84 | | - |
85 | | - # extracting ploidies from sc_quantum object |
86 | | - sc_confidence <- |
87 | | - vapply(sc_quants, function(x) |
88 | | - x$confidence_ratio, numeric(1)) |
89 | | - |
90 | | - SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies |
91 | | - SummarizedExperiment::colData(scCNA)$ploidy_confidence <- |
92 | | - sc_confidence |
93 | | - } |
| 145 | + # bincount mean estimate |
| 146 | + mean.est <- mean(bin[, z]) |
| 147 | + |
| 148 | + estimates <- scquantum::ploidy.inference( |
| 149 | + x = seg_bins_mean, |
| 150 | + chrom = NULL, |
| 151 | + start = NULL, |
| 152 | + end = NULL, |
| 153 | + seg_length = seg_length, |
| 154 | + iod = iod.est, |
| 155 | + mean_bincount = mean.est, |
| 156 | + do_segmentation = FALSE |
| 157 | + ) |
| 158 | + |
| 159 | + }) |
| 160 | + |
94 | 161 |
|
95 | | - # check to guarantee multiplication |
96 | | - if (!identical(names(df), colData(scCNA)$sample)) { |
97 | | - stop("Order of cells in segment_ratios and colData() is not identical.") |
98 | 162 | } |
99 | 163 |
|
100 | | - # obtain the matrix of integer values by multiplying the seg ratios |
101 | | - # by the diagonal of the ploidy colData vector |
102 | | - int_values <- |
103 | | - round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>% |
104 | | - as.data.frame() |
| 164 | + # extracting ploidies from sc_quantum object |
| 165 | + sc_ploidies <- |
| 166 | + vapply(sc_quants, function(x) |
| 167 | + x$ploidy, numeric(1)) |
| 168 | + |
| 169 | + # extracting ploidies from sc_quantum object |
| 170 | + sc_confidence <- |
| 171 | + vapply(sc_quants, function(x) |
| 172 | + x$confidence_ratio, numeric(1)) |
| 173 | + |
| 174 | + # calculating ploidy score from scquantum confidence ratio |
| 175 | + ploidy_score <- abs(1-sc_confidence) |
| 176 | + |
| 177 | + SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies |
| 178 | + SummarizedExperiment::colData(scCNA)$confidence_ratio <- |
| 179 | + sc_confidence |
| 180 | + SummarizedExperiment::colData(scCNA)$ploidy_score <- |
| 181 | + ploidy_score |
| 182 | + |
| 183 | + } |
| 184 | + |
| 185 | + # check to guarantee multiplication |
| 186 | + if (!identical(names(bin), colData(scCNA)$sample)) { |
| 187 | + stop("Order of cells in segment_ratios and colData() is not identical.") |
| 188 | + } |
| 189 | + |
| 190 | + # obtain the matrix of integer values by multiplying the seg ratios |
| 191 | + # by the diagonal of the ploidy colData vector |
| 192 | + int_values <- |
| 193 | + round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>% |
| 194 | + as.data.frame() |
105 | 195 |
|
106 | | - # recovering names |
107 | | - names(int_values) <- names(seg) |
| 196 | + # recovering names |
| 197 | + names(int_values) <- names(seg) |
108 | 198 |
|
109 | | - SummarizedExperiment::assay(scCNA, name) <- int_values |
| 199 | + SummarizedExperiment::assay(scCNA, name) <- int_values |
110 | 200 |
|
111 | | - return(scCNA) |
| 201 | + return(scCNA) |
112 | 202 | } |
0 commit comments