-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathBEDExporter.java
More file actions
148 lines (129 loc) · 5.13 KB
/
BEDExporter.java
File metadata and controls
148 lines (129 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
package org.seqcode.deepseq.utils;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import org.seqcode.deepseq.StrandedBaseCount;
import org.seqcode.deepseq.experiments.ControlledExperiment;
import org.seqcode.deepseq.experiments.ExperimentCondition;
import org.seqcode.deepseq.experiments.ExperimentManager;
import org.seqcode.deepseq.experiments.ExptConfig;
import org.seqcode.genome.GenomeConfig;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.location.ChromRegionIterator;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
public class BEDExporter {
protected GenomeConfig gconfig;
protected ExptConfig econfig;
protected ExperimentManager manager;
protected int [] stackedHitCountsPos;
protected int [] stackedHitCountsNeg;
final int MAXSECTION=50000000;
private int readLength=1;
private String outName="out";
public static void main(String[] args) throws SQLException, NotFoundException {
GenomeConfig gconfig = new GenomeConfig(args);
ExptConfig econfig = new ExptConfig(gconfig.getGenome(), args);
if(args.length==0 || gconfig.helpWanted()){
System.err.println("BEDExporter usage:\n" +
GenomeConfig.getArgsList()+"\n"+
ExptConfig.getArgsList()+"n"+
"BEDExporter:\n"+
"\t--readlen <read length>\n" +
"\t--out <output file name>");
System.exit(1);
}else{
String outName = Args.parseString(args,"out","out");
int readLen = Args.parseInteger(args,"readlen",40);
BEDExporter exporter = new BEDExporter(gconfig, econfig, outName, readLen);
exporter.execute();
exporter.close();
}
}
public BEDExporter(GenomeConfig gcon, ExptConfig econ, String out, int rL) {
gconfig = gcon;
econfig = econ;
manager = new ExperimentManager(econfig);
readLength = rL;
}
public void execute(){
for(ExperimentCondition c : manager.getConditions()){
for(ControlledExperiment rep : c.getReplicates()){
System.err.println("Condition "+c.getName()+":\tRep "+rep.getName());
try {
FileWriter fw = new FileWriter(outName+"."+c.getName()+"."+rep.getName()+".bed");
//fw.write("chrom\tindex\tforward\treverse\tvalue\n");
double basesDone=0, printStep=10000000, numPrint=0;
ChromRegionIterator chroms = new ChromRegionIterator(gconfig.getGenome());
while(chroms.hasNext()){
NamedRegion currentRegion = chroms.next();
//Split the job up into chunks of 100Mbp
for(int x=currentRegion.getStart(); x<=currentRegion.getEnd(); x+=100000000){
int y = x+100000000;
if(y>currentRegion.getEnd()){y=currentRegion.getEnd();}
Region currSubRegion = new Region(gconfig.getGenome(), currentRegion.getChrom(), x, y);
List<StrandedBaseCount> hits = rep.getSignal().getBases(currSubRegion);
double stackedHitCountsPos[] = make5PrimeLandscape(hits, currSubRegion, '+');
double stackedHitCountsNeg[] = make5PrimeLandscape(hits, currSubRegion, '-');
//Scan regions
for(int i=currSubRegion.getStart(); i<currSubRegion.getEnd(); i++){
int offset = i-currSubRegion.getStart();
double posHits=stackedHitCountsPos[offset];
double negHits=stackedHitCountsNeg[offset];
if(posHits>0 || negHits>0){
for(int hitc=0; hitc<(int)posHits; hitc++){
fw.write("chr"+currSubRegion.getChrom()+"\t"+i+"\t"+(i+readLength)+"\t"+"+"+"\n");
}
for(int hitc=0; hitc<(int)negHits; hitc++){
fw.write("chr"+currSubRegion.getChrom()+"\t"+(i+1-readLength)+"\t"+(i+1)+"\t"+"-"+"\n");
}
}
//Print out progress
basesDone++;
if(basesDone > numPrint*printStep){
if(numPrint%10==0){System.out.print(String.format("(%.0f)", (numPrint*printStep)));}
else{System.out.print(".");}
if(numPrint%50==0 && numPrint!=0){System.out.print("\n");}
numPrint++;
}
}
}
}
System.out.print("\n");
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void close(){
manager.close();
}
protected double[] make5PrimeLandscape(List<StrandedBaseCount> hits, Region currReg, char strand){
double[] startcounts = new double[(int)currReg.getWidth()+1];
for(int i=0; i<=currReg.getWidth(); i++){startcounts[i]=0;}
for(StrandedBaseCount r : hits){
if(strand=='.' || r.getStrand()==strand){
if(r.getCoordinate()>=currReg.getStart() && r.getCoordinate()<=currReg.getEnd()){
int offset5=inBounds(r.getCoordinate()-currReg.getStart(),0,currReg.getWidth());
startcounts[offset5]+=r.getCount();
}
}
}
return(startcounts);
}
//keep the number in bounds
protected final double inBounds(double x, double min, double max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
protected final int inBounds(int x, int min, int max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
}