-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathGeneORFs.java
More file actions
115 lines (105 loc) · 4.07 KB
/
GeneORFs.java
File metadata and controls
115 lines (105 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package org.seqcode.tools.sequence;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import org.seqcode.data.io.FASTAWriter;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.Gene;
import org.seqcode.genome.location.NamedStrandedRegion;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.location.ChromRegionIterator;
import org.seqcode.gsebricks.verbs.location.RefGeneGenerator;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
/**
* Returns the sequences of named gene ORFs
*
* cat gene_names.txt | java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped
* cat gene_names.txt | java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped --fasta
* java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped --fasta --allgenes
*
*
*/
public class GeneORFs {
private int upstream, downstream;
private List<RefGeneGenerator> geneGenerators;
private Genome genome;
private boolean allGenes, toFasta;
private FASTAWriter<NamedStrandedRegion> fwriter;
public static void main(String args[]) throws Exception {
GeneORFs gp = new GeneORFs();
gp.parseArgs(args);
gp.run();
}
public GeneORFs() {}
public void parseArgs(String args[]) throws NotFoundException {
geneGenerators = Args.parseGenes(args);
for (RefGeneGenerator r : geneGenerators) {
r.retrieveExons(false);
}
genome = Args.parseGenome(args).getLast();
allGenes = Args.parseFlags(args).contains("allgenes");
toFasta = Args.parseFlags(args).contains("fasta");
if (toFasta) {
fwriter = new FASTAWriter<NamedStrandedRegion>(System.out);
}
}
public void run() throws IOException {
if (allGenes) {
ChromRegionIterator chroms = new ChromRegionIterator(genome);
while (chroms.hasNext()) {
Region chrom = chroms.next();
/* we'll use all the gene generators provided but don't want to output duplicate regions.
seen is keyed on 5' and contains a list of 3' ends that have already been output.
*/
for (RefGeneGenerator refgene : geneGenerators) {
Iterator<Gene> iter = refgene.execute(chrom);
while (iter.hasNext()) {
Gene g = iter.next();
output(g);
}
}
}
} else {
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
String line = null;
while ((line = reader.readLine()) != null) {
Gene g = null;
String pieces[] = line.split("\\t");
int i = 0;
while (i < pieces.length && g == null) {
for (RefGeneGenerator refgene : geneGenerators) {
Iterator<Gene> iter = refgene.byName(pieces[i]);
while (iter.hasNext()) {
if (g == null) {
g = iter.next();
} else {
iter.next();
}
}
if (g != null) {
break;
}
}
i++;
}
if (g != null) {
g.setName(line);
output(g);
} else {
System.err.println("Couldn't find " + line);
}
}
}
}
public void output(NamedStrandedRegion r) {
if (toFasta) {
fwriter.consume(r);
} else {
System.out.println(String.format("%s\t%s:%d-%d:%s",
r.toString(),
r.getChrom(), r.getStart(), r.getEnd(), r.getStrand()));
}
}
}