-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGSE21307.R
More file actions
58 lines (43 loc) · 1.74 KB
/
GSE21307.R
File metadata and controls
58 lines (43 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#R script for GSE21307, made for Rik Lahaijes bachelor thesis 2020
setwd("C:/Users/rik-l/OneDrive/Documenten/GSE21307")
#Load all gene expression data per sample
dat<-read.delim("GSE21307_series_matrix.txt",as.is = TRUE ,skip=82, row.names=1)
#Delete last row of GSE21307 since last row marks end of matrix table
dat<-dat[-dim(dat)[1],]
#Load description of sample groups from GSE21307
desc<-read.delim("GSE21307_series_matrix.txt",as.is = TRUE ,skip=35, nrows = 10)
colnames(desc)<-desc[1,]
desc<-desc[9,,drop = FALSE]
desc<-desc[,-1]
desc[1,]<-sub("disease state: ","",desc[1,])
desc<-as.data.frame(t(desc))
colnames(desc)<-"group"
desc[,1]<-as.factor(desc[,1])
colnames(dat)==COlnames(desc)
#Change all NULL values into NA
dat[dat=="NULL"]<-NA
for(i in 1:(dim(dat)[2])) {dat[,i] <- as.numeric(dat[,i])}
#Load annotations containing gene names into R
anno<-read.delim("GPL4134-20425.txt" ,as.is = TRUE ,skip=20,)
#Count rows in data
table(rowSums(is.na(dat)))
anno<-anno[rowSums(is.na(dat))== 0,]
dat<-dat[rowSums(is.na(dat))== 0,]
#example code to create figure
png("dat.png", width = 1000, height = 1000, pointsize = 24)
boxplot(dat)
dev.off()
#File containing already existing code for statistical analysis
source("functions_ArrayAnalysis_v2.R")
ls()
#code to create various plots
createQCPlots(dat,"group", desc)
#Statistical improving power through matematical functions designed by limma
library(limma)
design<-model.matrix(~group,data=desc)
fit<-lmFit(dat,design)
fit<- eBayes(fit)
#Create excel files which compare groups vs control group including logFC, p value and gene names
files.c<-saveStatOutput(design,fit, annotation = anno)
#groupsnamen veranderen (clusterplot)
#contrasten toevoegen