-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathDumpGeneGFF.java
More file actions
87 lines (75 loc) · 3.18 KB
/
DumpGeneGFF.java
File metadata and controls
87 lines (75 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package org.seqcode.tools.location;
import java.util.*;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.ExonicGene;
import org.seqcode.genome.location.Gene;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.location.RefGeneGenerator;
import org.seqcode.gseutils.Args;
/**
* Dumps gene annotations in GFF format
*
*/
public class DumpGeneGFF {
public static void main(String args[]) throws Exception {
Genome genome = Args.parseGenome(args).cdr();
RefGeneGenerator genegen = Args.parseGenes(args).get(0);
genegen.retrieveExons(true);
genegen.setWantAlias(true);
Map<String,Collection<Gene>> genes = new HashMap<String,Collection<Gene>>();
Iterator<Gene> all = genegen.getAll();
while (all.hasNext()) {
Gene g = all.next();
String n = g.getName();
if (n.equals(g.getID())) {
Collection<String> notid = g.getNonIDNames();
if (notid.size() > 0) {
Iterator<String> iter = notid.iterator();
n = iter.next();
}
}
if (!genes.containsKey(n)) {
genes.put(n, new ArrayList<Gene>());
}
genes.get(n).add(g);
}
for (String id : genes.keySet()) {
String chrom = null;
int minpos = Integer.MAX_VALUE, maxpos = 0;
char strand = '+';
boolean mixedchroms = false;
for (Gene g : genes.get(id)) {
if (chrom == null) {
chrom = g.getChrom();
strand = g.getStrand();
} else {
if (!g.getChrom().equals(chrom)) {
mixedchroms = true;
}
}
minpos = Math.min(minpos, g.getStart());
maxpos = Math.max(maxpos, g.getEnd());
}
if (!chrom.matches("^.*")) {
chrom = "chr" + chrom;
}
System.out.println(String.format("%s\tprotein_coding\tgene\t%d\t%d\t.\t%s\t.\tID=%s",
chrom,minpos,maxpos,Character.toString(strand),id));
for (Gene g : genes.get(id)) {
System.out.println(String.format("%s\tprotein_coding\tmRNA\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s",
g.getChrom(),g.getStart(),g.getEnd(),Character.toString(g.getStrand()),g.getID(),id));
if (g instanceof ExonicGene) {
ExonicGene exonic = (ExonicGene)g;
Iterator<Region> iter = exonic.getExons();
int count = 1;
while (iter.hasNext()) {
Region e = iter.next();
System.out.println(String.format("%s\tprotein_coding\texon\t%d\t%d\t.\t%s\t.\tID=%s.%d;Parent=%s",
chrom,e.getStart(),e.getEnd(),Character.toString(g.getStrand()),g.getID(),count,g.getID()));
count++;
}
}
}
}
}
}