Skip to content

Commit 55a97a4

Browse files
Merge pull request #92 from navinlabcode/devel
Devel
2 parents 9a6c6c8 + 068e703 commit 55a97a4

17 files changed

Lines changed: 450 additions & 152 deletions

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: copykit
22
Title: CopyKit
33
URL: https://github.com/navinlabcode/copykit
44
BugReports: https://github.com/navinlabcode/copykit/issues
5-
Version: 0.1.1
5+
Version: 0.1.2
66
Authors@R: c(
77
person(given = "Darlan",
88
family = "Conterno Minussi",
@@ -35,7 +35,6 @@ Suggests:
3535
testthat,
3636
spelling,
3737
knitr,
38-
scquantum,
3938
rmarkdown,
4039
stringr
4140
Language: en-US
@@ -77,7 +76,8 @@ Imports:
7776
ggtree,
7877
methods,
7978
withr,
80-
bluster
79+
bluster,
80+
scquantum
8181
RoxygenNote: 7.1.2
8282
VignetteBuilder: knitr
8383
Depends:

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ export(find_scaffold_genes)
3030
export(graph)
3131
export(hdbscanCBI)
3232
export(inferMrca)
33+
export(knnSmooth)
3334
export(l2e.normal.sd)
3435
export(leidenCBI)
3536
export(logNorm)
@@ -96,6 +97,7 @@ importClassesFrom(SingleCellExperiment,SingleCellExperiment)
9697
importClassesFrom(SummarizedExperiment,RangedSummarizedExperiment)
9798
importFrom(BiocGenerics,`%in%`)
9899
importFrom(BiocGenerics,subset)
100+
importFrom(BiocNeighbors,findKNN)
99101
importFrom(BiocParallel,bplapply)
100102
importFrom(BiocParallel,bpparam)
101103
importFrom(DNAcopy,CNA)
@@ -164,6 +166,8 @@ importFrom(miniUI,miniPage)
164166
importFrom(mixtools,normalmixEM)
165167
importFrom(scales,hue_pal)
166168
importFrom(scales,percent_format)
169+
importFrom(scquantum,ploidy.inference)
170+
importFrom(scquantum,timeseries.iod)
167171
importFrom(shiny,checkboxGroupInput)
168172
importFrom(shiny,fillCol)
169173
importFrom(shiny,plotOutput)

NEWS.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,28 @@
11
# copykit 0.1.2
22

3+
**New features**
4+
* New cell smoothing method with the function `knnSmooth()`. Uses k-nearest neighbors to smooth cells profiles and re-segments the datasets obtaining cleaner copy number profiles, with reduced overdispersion and improving downstream analysis. (Thanks to [Runmin Wei]([)https://github.com/WandeRum) for the helpful discussion.)
5+
* scquantum method is available for `calcInteger()` and is now a CopyKit import ([scquantum](https://github.com/navinlabcode/scquantum) is a single cell ploidy estimation tool developed by [Alexander Davis](https://github.com/alex-l-m))
6+
* `calcInteger()` now accepts option methdo = 'metadata'. To use this option the user can add custom values of ploidy to every cell in the colData column 'ploidy' and run `calcInteger(ck, method = 'metadata')` to obtain the integer matrix on the CopyKit object.
7+
* runVst allows selection of the assay for the transformation
8+
9+
**Changes**
10+
* `plotHeatmap()` order_cells argument now defaults to NULL. NULL option respects the order of the CopyKit object. order_cells argument can be set to 'consensus_tree' and 'hclust'.
11+
12+
* Method 'scquantum' from `calcInteger()` adds 3 elements to the colData.
13+
1. ploidy: contanining the inferred ploidy call for each cell
14+
2. confidence_ratio: ratio from scquantum inferred ploidy to scquantum theoretical ploidy
15+
3. ploidy_score: Score derived from the confidence ratio. Values closer to 0 indicate a better fit of the ploidy call
16+
17+
* Significance thresholds for CBS alpha segmentation and Merge levels were reduced to increase sensitivity to focal amplifications.
18+
19+
**Removed**
20+
* option 'phylogeny' from function argument `plotHeatmap()' 'order_cells' has been removed.
21+
22+
**Bug Fixes**
323
* Fixed error in plotGeneCopy not returning plots with geom violin and barplot. (Thanks to @Romeo1-1)
424
* Fixed error in plotGeneCopy with duplicated sample names on a merged object. Now it warns the user of merged sample names. (Thanks to @Romeo1-1)
25+
* Allowing control of parameter merge_levels_alpha on `runVarbin()` and `runSegmentation()` to control the significance level of merge levels when merging not significant segments.
526

627

728
# copykit 0.1.1

R/calcInteger.R

Lines changed: 150 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' calcInteger()
22
#'
3-
#' Calculates the integer copy number profile for each single-cell
3+
#' Calculates the integer copy number profile for each single cell
44
#'
55
#' @param scCNA The CopyKit object.
66
#' @param assay String with the name of the assay to pull data from to calculate
@@ -16,97 +16,187 @@
1616
#' should be parallelized.
1717
#'
1818
#' @details
19+
#'
20+
#' CopyKit support the following methods for calculating integer copy number
21+
#' matrices
1922
#' \itemize{
2023
#' \item{fixed:} When method argument is set to 'fixed' copykit extracts the
2124
#' segment means from the scCNA object and multiplies those means by the value
2225
#' provided in the argument ploidy_value.
26+
#'
27+
#'
28+
#' \item{scquantum:} When the method argument is set to 'scquantum', CopyKit
29+
#' applies \code{\link[scquantum]{ploidy.inference}} function to perform a
30+
#' sample wise calculation returning the estimated compuational ploidy for
31+
#' every single cell
2332
#' }
2433
#'
25-
#' @return The scCNA object with an assay slot named 'integer' that contains
26-
#' a data frame with cells as columns and integerized segments as rows.
34+
#' @return The CopyKit object with an assay slot named 'integer' that contains
35+
#' a data frame with cells as columns and integerized segments as rows. And, in
36+
#' case of method = 'scquantum' CopyKit adds three new elements to \code{colData}
37+
#' named 'ploidy' and 'ploidy_score' and the 'confidence ratio' obtained from
38+
#' scquantum for each cell.
39+
#'
2740
#' @export
2841
#'
2942
#' @importFrom S4Vectors metadata
3043
#' @importFrom SummarizedExperiment assay colData rowRanges
44+
#' @importFrom scquantum ploidy.inference timeseries.iod
3145
#'
3246
#' @examples
3347
#' copykit_obj <- mock_bincounts(ncells_diploid = 0, ncells = 10)
3448
#' copykit_obj <- calcInteger(copykit_obj, method = "scquantum")
3549

3650
calcInteger <- function(scCNA,
37-
assay = "bincounts",
51+
assay = c("bincounts",
52+
'smoothed_bincounts',
53+
"segment_ratios"),
3854
method = "fixed",
3955
ploidy_value = NULL,
4056
name = "integer",
4157
penalty = 25,
4258
BPPARAM = bpparam()) {
59+
# args
60+
assay = match.arg(assay)
61+
62+
if (!is.null(assay(scCNA, 'smoothed_bincounts')) && assay == 'bincounts'
63+
&& method == 'scquantum') {
64+
warning("CopyKit detected that knnSmooth() has been performed.
65+
If working with knnSmooth datasets we recommend using assay 'segment_ratios':
66+
calcInteger(copykit, assay = 'segment_ratios')")
67+
}
68+
69+
# getting datasets
70+
if (assay == 'bincounts') {
71+
bin <- SummarizedExperiment::assay(scCNA, 'bincounts')
72+
}
73+
74+
if (assay %in% c('smoothed_bincounts','segment_ratios')) {
75+
bin <- SummarizedExperiment::assay(scCNA, 'smoothed_bincounts')
76+
}
77+
78+
seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
79+
80+
if (!is.null(ploidy_value)) {
81+
if (method == "fixed") {
82+
if (is.null(ploidy_value) && !is.numeric(ploidy_value)) {
83+
stop("Method fixed requires a numeric value for ploidy_value.")
84+
}
85+
86+
message("Scaling ratio values by ploidy value ",
87+
ploidy_value)
88+
89+
# ploidy values are added to colData information
90+
SummarizedExperiment::colData(scCNA)$ploidy <-
91+
ploidy_value
92+
93+
# saving ploidy scaling method
94+
S4Vectors::metadata(scCNA)$ploidy_method <- "fixed"
95+
}
96+
}
97+
98+
if (method == "metadata") {
99+
# method metadata just allows the segment ratios to be integerized based
100+
# on the values for each cell in the colData(scCNA)$ploidy information.
101+
if (!is.null(colData(scCNA)$ploidy)) {
102+
message('Calculating integer values based on colData(scCNA)$ploidy info.')
103+
} else {
104+
stop("Method 'metadata' requires colData(scCNA)$ploidy information.")
105+
}
43106

44-
df <- SummarizedExperiment::assay(scCNA, assay)
45-
seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
46-
107+
}
108+
109+
# logic for scquantum method
110+
if (method == "scquantum") {
111+
rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA))
112+
113+
# method scquantum with input bincounts
114+
if (assay %in% c('bincounts', 'smoothed_bincounts')) {
115+
sc_quants <-
116+
BiocParallel::bplapply(
117+
assay(scCNA, assay),
118+
scquantum::ploidy.inference,
119+
chrom = rg$seqnames,
120+
start = rg$start,
121+
end = rg$end,
122+
penalty = penalty,
123+
BPPARAM = BPPARAM
124+
)
125+
}
47126

48-
if (!is.null(ploidy_value)) {
49-
if (method == "fixed") {
50-
if (is.null(ploidy_value) && !is.numeric(ploidy_value)) {
51-
stop("Method fixed requires a numeric value for ploidy_value.")
52-
}
127+
# method scquantum with input segment ratios
128+
if (assay == 'segment_ratios') {
53129

54-
message("Scaling ratio values by ploidy value ",
55-
ploidy_value)
130+
sc_quants <- BiocParallel::bplapply(seq_along(seg), function(z) {
131+
# extracting segments rle id and lengths
132+
segnums <- cumsum(c(TRUE, abs(diff(seg[, z])) > 0.00001))
133+
seg_length <- rle(seg[, z])$lengths
56134

57-
# ploidy values are added to colData information
58-
SummarizedExperiment::colData(scCNA)$ploidy <-
59-
ploidy_value
135+
# extracting segment-wise means and index of dispersion
136+
seg_bins_mean <- tapply(bin[, z], segnums, mean)
60137

61-
# saving ploidy scaling method
62-
S4Vectors::metadata(scCNA)$ploidy_method <- "fixed"
138+
if (any(seg_length <= 3)) {
139+
iod.est <- scquantum::timeseries.iod(bin[,z])
140+
} else {
141+
iod.est <-
142+
tapply(bin[, z], segnums, scquantum::timeseries.iod)
63143
}
64-
}
65144

66-
if (method == "scquantum") {
67-
rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA))
68-
69-
sc_quants <-
70-
BiocParallel::bplapply(
71-
assay(scCNA, assay),
72-
scquantum::ploidy.inference,
73-
chrom = rg$seqnames,
74-
start = rg$start,
75-
end = rg$end,
76-
penalty = penalty,
77-
BPPARAM = BPPARAM
78-
)
79-
80-
# extracting ploidies from sc_quantum object
81-
sc_ploidies <-
82-
vapply(sc_quants, function(x)
83-
x$ploidy, numeric(1))
84-
85-
# extracting ploidies from sc_quantum object
86-
sc_confidence <-
87-
vapply(sc_quants, function(x)
88-
x$confidence_ratio, numeric(1))
89-
90-
SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies
91-
SummarizedExperiment::colData(scCNA)$ploidy_confidence <-
92-
sc_confidence
93-
}
145+
# bincount mean estimate
146+
mean.est <- mean(bin[, z])
147+
148+
estimates <- scquantum::ploidy.inference(
149+
x = seg_bins_mean,
150+
chrom = NULL,
151+
start = NULL,
152+
end = NULL,
153+
seg_length = seg_length,
154+
iod = iod.est,
155+
mean_bincount = mean.est,
156+
do_segmentation = FALSE
157+
)
158+
159+
})
160+
94161

95-
# check to guarantee multiplication
96-
if (!identical(names(df), colData(scCNA)$sample)) {
97-
stop("Order of cells in segment_ratios and colData() is not identical.")
98162
}
99163

100-
# obtain the matrix of integer values by multiplying the seg ratios
101-
# by the diagonal of the ploidy colData vector
102-
int_values <-
103-
round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>%
104-
as.data.frame()
164+
# extracting ploidies from sc_quantum object
165+
sc_ploidies <-
166+
vapply(sc_quants, function(x)
167+
x$ploidy, numeric(1))
168+
169+
# extracting ploidies from sc_quantum object
170+
sc_confidence <-
171+
vapply(sc_quants, function(x)
172+
x$confidence_ratio, numeric(1))
173+
174+
# calculating ploidy score from scquantum confidence ratio
175+
ploidy_score <- abs(1-sc_confidence)
176+
177+
SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies
178+
SummarizedExperiment::colData(scCNA)$confidence_ratio <-
179+
sc_confidence
180+
SummarizedExperiment::colData(scCNA)$ploidy_score <-
181+
ploidy_score
182+
183+
}
184+
185+
# check to guarantee multiplication
186+
if (!identical(names(bin), colData(scCNA)$sample)) {
187+
stop("Order of cells in segment_ratios and colData() is not identical.")
188+
}
189+
190+
# obtain the matrix of integer values by multiplying the seg ratios
191+
# by the diagonal of the ploidy colData vector
192+
int_values <-
193+
round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>%
194+
as.data.frame()
105195

106-
# recovering names
107-
names(int_values) <- names(seg)
196+
# recovering names
197+
names(int_values) <- names(seg)
108198

109-
SummarizedExperiment::assay(scCNA, name) <- int_values
199+
SummarizedExperiment::assay(scCNA, name) <- int_values
110200

111-
return(scCNA)
201+
return(scCNA)
112202
}

0 commit comments

Comments
 (0)