forked from pozorvlak/microarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmicroarray.pl
More file actions
62 lines (46 loc) · 1.69 KB
/
microarray.pl
File metadata and controls
62 lines (46 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/perl -w
use strict;
use List::MoreUtils qw/any/;
use List::Util qw/sum/;
use Scalar::Util qw/looks_like_number/;
use Statistics::Lite qw/mean stddev/;
use feature 'say';
our $VERSION = '0.001';
######################
#Microarray Filter and Fold Change Finder
######################
#Open data file and read into array:
say "\nMicroarray Filter and Analysis Tool:";
die ("\nUse: perl microarray.pl <Input datafile.txt>") unless @ARGV == 1;
my @genes;
while (<>) {
chomp;
next if /^probes/; # Header line: ignore
my ($name, @values) = split;
# Die on unexpected non-numeric data
die "File '$ARGV' contains non-numeric data at line $."
if any { !looks_like_number($_) } @values;
# We only care about genes which have at least one sample greater than 300.
next unless any { $_ > 300 } @values;
push @genes, { name => $name, values => \@values };
}
say "\nThere are " . scalar(@genes) . " genes that meet filter criteria.\n";
my %score;
for my $gene (@genes) {
my $data = $gene->{values};
my @control = @$data[ 0 .. 19]; # first 20
my @sample = @$data[20 .. 40]; # next 21
my $fldNum = mean(@control) - mean(@sample);
my $fldDenom = stddev(@control) + stddev(@sample);
my $fldScore = $fldNum / $fldDenom;
$score{$fldScore} = $gene->{name};
say "FLD score: $fldScore";
say "Current cycle: ", scalar keys %score
if keys(%score) % 100 == 0;
}
say "Top Ranking Differentially Expressed Genes:";
my $scoreCounter = 1;
foreach my $key (sort keys %score) {
say "$scoreCounter. $score{$key}";
$scoreCounter++;
}