secretome_dictionary/Capeling_Figure_2_Code.Rmd at main · Genentech/secretome_dictionary

History

652 lines (500 loc) · 23.2 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

---

title: "Capeling_Figure_2_Code.Rmd"

output: html_document

---

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = TRUE)

#Load necessary packages

library(dplyr)

library(Matrix)

library(cowplot)

library(ggplot2)

require(RColorBrewer)

library(Seurat)

require(ggpubr)

require(heatmap3)

library(reshape)

library(ComplexHeatmap)

library(ggrepel)

#Load in organoid injury model atlas

#From zipped file associated with Figure 1

#Figure 2A Heatmaps

#Using ta_degs csv saved from glmgampoi DEG analysis associated with code in Figure 1

#Loop through the code here to generate plots for each of 4 signatures shown

gp19_genes <- c('CAV1','ARL4C','SLC3A2','TNFRSF12A','CDKN2A','SPINK1','ANXA5','ARL14','ANXA1','DKK1','DRAP1','FGF2')

gp16_genes <- c('TNFSF10','IL32','DUOX2','DUOXA2','ZBP1','SAMD9L','IFIT2','CXCL11','IFIT3','TGM2','GBP5','CXCL10')

gp8_genes <- c('LCN15','CCDC60','AQP5','CD70','BAX','BCL2L1','RPS27L','GLUL','DDB2','S100A2','MDM2','LY6D')

gp6_genes <- c('SATB2','INPP4B','SLC4A4','EXOC6B','PTPRK','PLCE1','ATP8A1','LGR4','SHROOM3','HNF4G','FSIP2','THRB')

#Loop here, replace with each of the individual gp genes

gp_df <- ta_degs[ta_degs$name %in% gp_genes, ]

#Cluster the plot

matrix_df <- gp_df %>%

select(name, perturbation_coarse, lfc_shrink) %>%

spread(perturbation_coarse, lfc_shrink)

matrix_df[is.na(matrix_df)] <- 0

rownames(matrix_df) <- matrix_df$name

matrix_df <- matrix_df[, -1]

# Create a matrix for significance

significance_matrix <- gp_df %>%

select(name, perturbation_coarse, adj_pval) %>%

spread(perturbation_coarse, adj_pval)

significance_matrix[is.na(significance_matrix)] <- 1

rownames(significance_matrix) <- significance_matrix$name

significance_matrix <- significance_matrix[, -1]

# Calculate -log10(padj)

log10_pvalue_matrix <- -log10(significance_matrix)

# Define the color mapping from -5 to 5

fixed_max_val <- 5

fixed_min_val <- -5

colorMapping <- circlize::colorRamp2(c(fixed_min_val, 0, fixed_max_val), c("blue", "white", "red"))

# Specify the desired order of perturbations

perturbation_order <- c('DSS_Low','DSS_High','Irradiation_Low','Irradiation_High','Cytomix_4','Cytomix_1','Dissociation_4hr','Dissociation_8hr','Dissociation_24hr')

column_order <- match(perturbation_order, colnames(matrix_df))

# Assuming scaling_factor is defined somewhere before this code

scaling_factor <- 0.03 # Example value, adjust according to your data

# Create the heatmap with the fixed color mapping

h <- Heatmap(matrix_df, name = "avg_log2FC",

column_title = "Perturbation",

row_title = "Gene",

column_order = column_order,

col = colorMapping,

cell_fun = function(j, i, x, y, width, height, fill) {

if (!is.na(significance_matrix[i, j]) && significance_matrix[i, j] <= 0.05) {

size <- unit(log10_pvalue_matrix[i, j] * scaling_factor, "mm")

grid.circle(x, y, r = size, gp = gpar(col = rgb(0.2, 0.2, 0.2, 0.5), fill = NA, lwd = 0.5))

}

})

# Choose representative -log10(padj) values for the legend

legend_values <- c(1, 40, 80, 120)

legend_labels <- paste0("-log10(padj) = ", legend_values)

# Ensure the legend values are scaled appropriately

scaled_legend_values <- legend_values * scaling_factor

# Create a legend for the circle sizes

circle_legend <- Legend(at = scaled_legend_values, labels = legend_labels,

title = "Significance", type = "points",

pch = 1, size = unit(scaled_legend_values, "mm"),

legend_gp = gpar(col = rgb(0.2, 0.2, 0.2, 0.5), fill = NA, lwd = 0.5))

# Draw the heatmap and the legend

draw(h, annotation_legend_list = list(circle_legend))

#Figure 2A GO Heatmaps

#Load in z scores for all genes associated with injury-derived GPs: found in Table S5A

#Alternatively, all calculated GO terms are stored in Table S5C

z_scores <- as.data.frame(z_scores)

long_data <- z_scores %>%

as.data.frame() %>%

rownames_to_column(var = "Module") %>%

pivot_longer(-Module, names_to = "Gene", values_to = "z_score")

# Find the top 30 genes in each module

top_genes <- long_data %>%

group_by(Module) %>%

top_n(30, z_score)

Mod_19 <- top_genes[top_genes$Module == '19', ]

Mod_19 <- Mod_19$Gene

Mod_16 <- top_genes[top_genes$Module == '16', ]

Mod_16 <- Mod_16$Gene

Mod_8 <- top_genes[top_genes$Module == '8', ]

Mod_8 <- Mod_8$Gene

Mod_6 <- top_genes[top_genes$Module == '6', ]

Mod_6 <- Mod_6$Gene

#Run EnrichR to establish GO terms associated with GPs

library(enrichR)

setEnrichrSite("Enrichr") # Human genes

websiteLive <- TRUE

dbs <- listEnrichrDbs()

if (is.null(dbs)) websiteLive <- FALSE

if (websiteLive)

head(dbs)

#GO Terms and plot for GP19 - repeat for other 3 GPs shown

dbs <- c("GO_Molecular_Function_2023", "GO_Cellular_Component_2023", "GO_Biological_Process_2023")

if (websiteLive) {

enriched <- enrichr(Mod_19, dbs)

}

if (websiteLive) enriched[["GO_Biological_Process_2023"]]

if (websiteLive) p <- plotEnrich(enriched[[3]], showTerms = 20, numChar = 100, y = "Count", orderBy = "P.value")

gp19_go <- enriched[[3]] #Pull GO Terms from GO_Biological_Process_2023

#Modify here to run additional GPs

top_terms_gp19 <- gp19_go[c(1,2,4,7),] #Pull out 4 GO terms of interest from the top terms

#top_terms_gp16 <- gp16_go[c(3,5,6,8),] #Pull out 4 GO terms of interest from the top terms

#top_terms_gp8 <- gp8_go[c(1,2,4,6),] #Pull out 4 GO terms of interest from the top terms

#top_terms_gp6 <- gp6_go[c(1,2,3,5),] #Pull out 4 GO terms of interest from the top terms

top_terms_gp19$Genes_Count <- sapply(strsplit(top_terms_gp19$Genes, ";"), length)

top_terms_gp19$Neg_Log_Pvalue <- -log10(top_terms_gp19$Adjusted.P.value)

p <- ggplot(top_terms_gp19, aes(x=factor(1), y=Term, size=Neg_Log_Pvalue, color=Genes_Count)) +

geom_point(alpha=1) +

scale_size_continuous(range = c(1, 10), limits=c(0, 10)) + # Set limits for size scale to keep consistency

scale_color_gradient(low="white", high="red", limits=c(0, 8), na.value="gray", oob=scales::squish) +

labs(x="", y="Term", title="GO Terms: IBD1") +

theme_minimal() +

theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +

guides(size=guide_legend(title="-Log(padj)"),

color=guide_legend(title="Number of Genes", override.aes = list(size=6)))

#Figure 2C: Comparison of perturbation response in different injury models: DSS vs. Cytomix

#Load in cytomix vs. DSS object

#Run DESeq2 to calculate DEGs for each perturbation + cytomix vs. cytomix-only, and each perturbation + DSS vs. DSS-only

#These DEG results are stored in Table S6A and S6B

Idents(injury_merge) <- injury_merge$injury

#Subset out two individual objects to compare just DSS and cytomix, separately

dss <- subset (injury_merge, idents=c('DSS'))

cytomix <- subset (injury_merge, idents=c('Cytomix'))

library(tidyverse)

library(cowplot)

library(Matrix.utils)

library(edgeR)

library(Matrix)

library(reshape2)

library(S4Vectors)

library(SingleCellExperiment)

library(pheatmap)

library(apeglm)

library(png)

library(DESeq2)

library(RColorBrewer)

library(data.table)

#First running the code on the DSS object - run for all cell types, and then repeat to run for all cell types on the cytomix object

#Change final annotations to be . instead of _ so it won't mess up tstrsplit

dss$final_annotations <- gsub("_", ".", dss$final_annotations)

[email protected]$donor_perturbation <- paste(dss$assignment, dss$perturbation, sep=".")

# Extract raw counts and metadata to create SingleCellExperiment object

counts <- dss[["RNA"]]$counts

metadata <- [email protected]

# Set up metadata as desired for aggregation and DE analysis

metadata$cluster_id <- factor([email protected])

# Create single cell experiment object

sce <- SingleCellExperiment(assays = list(counts = counts),

colData = metadata)

# Subset metadata to include only variables to aggregate across

groups <- colData(sce)[, c("final_annotations", "donor_perturbation")]

head(groups)

# Aggregate across cluster-sample groups

# transposing row/columns to have cell_ids as row names matching those of groups

aggr_counts <- aggregate.Matrix(t(counts(sce)),

groupings = groups, fun = "sum")

# Explore output matrix

class(aggr_counts)

dim(aggr_counts)

aggr_counts[1:6, 1:6]

# Transpose aggregated matrix to have genes as rows and samples as columns

aggr_counts <- t(aggr_counts)

aggr_counts[1:6, 1:6]

# Understanding tstrsplit()

## Exploring structure of function output (list)

tstrsplit(colnames(aggr_counts), "_") %>% str()

## Comparing the first 10 elements of our input and output strings

head(colnames(aggr_counts), n = 10)

head(tstrsplit(colnames(aggr_counts), "_")[[1]], n = 10)

# Loop over all cell types to extract corresponding counts, and store information in a list

## Initiate empty list

counts_ls <- list()

cluster_names <- unique(dss$final_annotations)

cluster_names

for (i in 1:length(cluster_names)) {

## Extract indexes of columns in the global matrix that match a given cluster

column_idx <- which(tstrsplit(colnames(aggr_counts), "_")[[1]] == cluster_names[i])

## Store corresponding sub-matrix as one element of a list

counts_ls[[i]] <- aggr_counts[, column_idx]

names(counts_ls)[i] <- cluster_names[i]

}

# Explore the different components of the list

str(counts_ls)

# Extract sample-level variables

metadata <- colData(sce) %>%

as.data.frame() %>%

dplyr::select(perturbation, assignment, donor_perturbation)

dim(metadata)

head(metadata)

# Exclude duplicated rows

metadata <- metadata[!duplicated(metadata), ]

dim(metadata)

head(metadata)

# Rename rows

rownames(metadata) <- metadata$donor_perturbation

head(metadata)

# Number of cells per sample and cluster

t <- table(colData(sce)$donor_perturbation,

colData(sce)$final_annotations)

t[1:6, 1:6]

# Creating metadata list

## Initiate empty list

metadata_ls <- list()

for (i in 1:length(counts_ls)) {

## Initiate a data frame for cluster i with one row per sample (matching column names in the counts matrix)

df <- data.frame(cluster_sample_id = colnames(counts_ls[[i]]))

## Use tstrsplit() to separate cluster (cell type) and sample IDs

df$final_annotations <- tstrsplit(df$cluster_sample_id, "_")[[1]]

df$donor_perturbation <- tstrsplit(df$cluster_sample_id, "_")[[2]]

## Retrieve cell count information for this cluster from global cell count table

idx <- which(colnames(t) == unique(df$final_annotations))

cell_counts <- t[, idx]

## Remove samples with zero cell contributing to the cluster

cell_counts <- cell_counts[cell_counts > 0]

## Match order of cell_counts and sample_ids

sample_order <- match(df$donor_perturbation, names(cell_counts))

cell_counts <- cell_counts[sample_order]

## Append cell_counts to data frame

df$cell_count <- cell_counts

## Join data frame (capturing metadata specific to cluster) to generic metadata

df <- plyr::join(df, metadata,

by = intersect(names(df), names(metadata)))

## Update rownames of metadata to match colnames of count matrix, as needed later for DE

rownames(df) <- df$cluster_sample_id

## Store complete metadata for cluster i in list

metadata_ls[[i]] <- df

names(metadata_ls)[i] <- unique(df$final_annotations)

}

# Explore the different components of the list

str(metadata_ls)

# Double-check that both lists have same names

all(names(counts_ls) == names(metadata_ls)) #TRUE

#Run this section of code again for each cell type - here running goblet cells but run for all cell types

idx <- which(names(counts_ls) == "Goblet")

cluster_counts <- counts_ls[[idx]]

cluster_metadata <- metadata_ls[[idx]]

cluster_counts[1:6, 1:6]

head(cluster_metadata)

# Check matching of matrix columns and metadata rows

all(colnames(cluster_counts) == rownames(cluster_metadata)) #TRUE

# Create DESeq2 object

dds <- DESeqDataSetFromMatrix(cluster_counts,

colData = cluster_metadata,

design = ~ perturbation)

# Transform counts for data visualization

rld <- rlog(dds, blind=TRUE)

#Re-level to set control

dds$perturbation <- relevel(dds$perturbation, ref = "Sato")

# Run DESeq2 differential expression analysis

dds <- DESeq(dds)

resultsNames(dds)

# Generate results object

res <- results(dds,

name = "perturbation_BMP2_vs_Sato" ,

alpha = 0.05)

# Shrink the log2 fold changes to be more appropriate using the apeglm method

res <- lfcShrink(dds,

coef = "perturbation_BMP2_vs_Sato",

res=res,

type = "apeglm")

# Turn the DESeq2 results object into a tibble for use with tidyverse functions

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'BMP2'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

# Generate results object

res <- results(dds,

name = "perturbation_BMP6_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_BMP6_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'BMP6'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

# Generate results object

res <- results(dds,

name = "perturbation_EGF_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_EGF_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'EGF'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

# Generate results object

res <- results(dds,

name = "perturbation_HBEGF_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_HBEGF_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'HBEGF'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

res <- results(dds,

name = "perturbation_IL22_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_IL22_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'Il22'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

res <- results(dds,

name = "perturbation_TGFB1_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_TGFB1_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'TGFB1'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

res <- results(dds,

name = "perturbation_Untreated_vs_Sato" ,

alpha = 0.05)

res <- lfcShrink(dds,

coef = "perturbation_Untreated_vs_Sato",

res=res,

type = "apeglm")

res_tbl <- res %>%

data.frame() %>%

rownames_to_column(var = "gene") %>%

as_tibble() %>%

arrange(padj)

res_tbl$celltype <- 'Goblet'

res_tbl$perturbation <- 'Untreated'

Cytomix_DSS_DEGs <- rbind(Cytomix_DSS_DEGs, res_tbl)

#Save file as h2h_DSS

#Then repeat code for cytomix object, cycle through cell types, and save as h2h_cyto

#Using DESeq2 results (Table S6)

#Total number of DEGs

# upregulated ONLY

h2h_DSS <- h2h_DSS[!is.na(h2h_DSS$log2FoldChange), ]

degs_dss <- h2h_DSS[abs(h2h_DSS$log2FoldChange)>1 & h2h_DSS$pvalue<0.05, ]

degs_dss_up <- degs_dss[degs_dss$log2FoldChange>0, ]

degs_dss_up$gene_cell_pert <- paste(degs_dss_up$gene, degs_dss_up$celltype, sep="-")

degs_dss_up$gene_cell_pert <- paste(degs_dss_up$gene_cell_pert, degs_dss_up$perturbation, sep="-")

h2h_cyto <- h2h_cyto[!is.na(h2h_cyto$log2FoldChange), ]

degs_cyto <- h2h_cyto[abs(h2h_cyto$log2FoldChange)>1 & h2h_cyto$pvalue<0.05, ]

degs_cyto_up <- degs_cyto[degs_cyto$log2FoldChange>0, ]

degs_cyto_up$gene_cell_pert <- paste(degs_cyto_up$gene, degs_cyto_up$celltype, sep="-")

degs_cyto_up$gene_cell_pert <- paste(degs_cyto_up$gene_cell_pert, degs_cyto_up$perturbation, sep="-")

# DEGs up in DSS

degs_dss_up$in_cyto <- 'no'

for (i in 1:length(rownames(degs_dss_up))) {

if (degs_dss_up$gene_cell_pert[i] %in% degs_cyto_up$gene_cell_pert) {

degs_dss_up$in_cyto[i] <-'yes'

}

# highlighting number that is overlapping with cytomix per perturbation per cell type

degs_dss_up <- degs_dss_up[!is.na(degs_dss_up$perturbation), ]

cell_colors = c('Colonocyte_Immature' = '#6666FF', 'Colonocyte_Mature' = '#00008B', 'Goblet'= '#90EE90', 'Stem'= '#FFB6C1', 'Transit_Amplifying' = '#C71585')

ggplot() +

geom_bar(data = degs_dss_up, aes(x = celltype, color = in_cyto, fill = celltype, linetype = in_cyto)) +

theme_bw() +

scale_color_manual(values = c("black", "black")) +

scale_linetype_manual(values = c("solid", "dashed")) +

facet_grid(~ perturbation) +

scale_fill_manual(values = cell_colors) +

ylim(0, 500) +

theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

# DEGs up in Cyto

degs_cyto_up$in_dss <- 'no'

for (i in 1:length(rownames(degs_cyto_up))) {

if (degs_cyto_up$gene_cell_pert[i] %in% degs_dss_up$gene_cell_pert) {

degs_cyto_up$in_dss[i] <-'yes'

}

# highlighting number that is overlapping with DSS per perturbation per cell type

degs_cyto_up <- degs_cyto_up[!is.na(degs_cyto_up$perturbation), ]

ggplot() +

geom_bar(data = degs_cyto_up, aes(x = celltype, color = in_dss, fill = celltype, linetype = in_dss)) +

theme_bw() +

scale_color_manual(values = c("black", "black")) +

scale_linetype_manual(values = c("solid", "dashed")) +

facet_grid(~ perturbation) +

scale_fill_manual(values = cell_colors) +

scale_y_reverse() +

ylim(0, 500) +

theme(axis.text.x = element_text(angle = 45, vjust = 1))

#Figure 2D: 4-way plots comparing perturbation responses in DSS-background vs. cytomix-background

# for Immature colonocyte only here

immEC_cyto <- h2h_cyto[h2h_cyto$celltype=="Colonocyte_Immature", ]

immEC_dss <- h2h_DSS[h2h_DSS$celltype=="Colonocyte_Immature", ]

#make combined table witih both cyto and DSS

`%!in%` <- function(x, table) {

!(x %in% table)

}

immEC_cyto$merge_id <- paste(immEC_cyto$gene, immEC_cyto$perturbation, sep="_")

immEC_dss$merge_id <- paste(immEC_dss$gene, immEC_dss$perturbation, sep="_")

combined <- immEC_cyto[, c('gene','log2FoldChange','padj','perturbation','merge_id')]

colnames(combined) <- c('gene','log2fc_cyto','padj_cyto', 'perturbation','merge_id')

combined$logfc_dss <-0

combined$logfc_dss<- immEC_dss[match(combined$merge_id, immEC_dss$merge_id), 4]

combined$padj_dss <-1

combined$padj_dss<- immEC_dss[match(combined$merge_id, immEC_dss$merge_id), 7]

combined <- combined[combined$perturbation %!in% 'Untreated', ]

ggplot(combined, aes(x =log2fc_cyto, y = logfc_dss)) +

geom_hline(yintercept=0,linetype=2) +

geom_vline(xintercept=0, linetype=2)+

geom_jitter(position=position_jitter(0.2), size=0.7, color='darkgrey')+

geom_text_repel(data=combined %>% filter(abs(logfc_dss>1.8) & abs(log2fc_cyto<0.2)),aes(x =log2fc_cyto, y = logfc_dss, label=gene), color='black', size=2.5, max.overlaps = 46) +

geom_jitter(data=combined %>% filter(abs(logfc_dss>1.8) & abs(log2fc_cyto<0.2)),aes(x =log2fc_cyto, y = logfc_dss), color='blue', size=0.7)+

geom_text_repel(data=combined %>% filter(abs(log2fc_cyto>2) & abs(logfc_dss<0.2)),aes(x =log2fc_cyto, y = logfc_dss, label=gene), color='black', size=2.5, max.overlaps = 75) +

geom_jitter(data=combined %>% filter(abs(log2fc_cyto>2) & abs(logfc_dss<0.2)),aes(x =log2fc_cyto, y = logfc_dss), color='red', size=0.7)+

theme_classic() +

facet_wrap(~ perturbation) +

ggtitle('Colonocyte DEGs')

#Figure 1E: Dotplot showing logFC of receptors associated with perturbation subset

#Use degs results saved from glmgampoi DEG analysis associated with code in Figure 1

#Read in DEGs saved for each cell type

ta_degs$celltype <- 'TA'

colono_degs$celltype <- 'colono'

goblet_degs$celltype <- 'goblet'

stem_degs$celltype <- 'stem'

#Define list of receptors/antagonists for perturbations used in head to head injury experiment

receptor_list <- c('BMPR1A', 'ACVR1', 'ACVR2A', 'ACVR2B', 'BMPR2', 'BMPR1B', 'EGFR', 'ERBB3', 'ERBB2','LRIG1', 'IL22RA1', 'IL20RB', 'IL10RB', 'TGFBR1', 'TGFBR2')

all_degs <- rbind(goblet_degs, ta_degs, colono_degs, stem_degs)

results <- all_degs[all_degs$name %in% receptor_list, ]

results$p_value_fdr_transf <- -log10(results$adj_pval)

results <- results %>%

mutate(cell_perturbation = paste(celltype, perturbation_coarse, sep = "_"))

#Specify order for plot

desired_order <- c("stem_DSS_Low", "TA_DSS_Low", "colono_DSS_Low", "goblet_DSS_Low", "stem_DSS_High", "TA_DSS_High", "colono_DSS_High", "goblet_DSS_High", "stem_Irradiation_Low", "TA_Irradiation_Low", "colono_Irradiation_Low", "goblet_Irradiation_Low", "stem_Irradiation_High", "TA_Irradiation_High", "colono_Irradiation_High", "goblet_Irradiation_High", "stem_Cytomix_4", "TA_Cytomix_4", "colono_Cytomix_4", "goblet_Cytomix_4", "stem_Cytomix_1", "TA_Cytomix_1", "colono_Cytomix_1", "goblet_Cytomix_1", "stem_Dissociation_4hr", "TA_Dissociation_4hr", "colono_Dissociation_4hr", "goblet_Dissociation_4hr","stem_Dissociation_8hr", "TA_Dissociation_8hr", "colono_Dissociation_8hr", "goblet_Dissociation_8hr", "stem_Dissociation_24hr", "TA_Dissociation_24hr", "colono_Dissociation_24hr", "goblet_Dissociation_24hr")

# Reorder the cell_perturbation factor based on desired_order

results$cell_perturbation <- factor(results$cell_perturbation, levels = desired_order)

results$name <- factor(results$name, levels = receptor_list)

# Create dot plot

p <- ggplot(results, aes(x = cell_perturbation, y = name, size = p_value_fdr_transf, fill = lfc_shrink)) +

geom_point(alpha = 1, shape = 21, aes(color = lfc_shrink)) +

scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +

scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +

scale_size(name = "p-val", range = c(0, 5)) +

geom_point(data = subset(results, adj_pval < 0.05 & abs(lfc_shrink) > 0.75), shape = 21, color = 'black', stroke = 0.5) +

theme_classic() +

theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10)) +

ggtitle('Figure 1H: logFC of Receptors')

```

## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

```{r cars}

summary(cars)

```

## Including Plots

You can also embed plots, for example:

```{r pressure, echo=FALSE}

plot(pressure)

```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

Capeling_Figure_2_Code.Rmd

Latest commit

History

Capeling_Figure_2_Code.Rmd

File metadata and controls