-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMathUtil.java
More file actions
403 lines (335 loc) · 14 KB
/
MathUtil.java
File metadata and controls
403 lines (335 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.util;
import java.math.BigDecimal;
import java.util.Arrays;
import static java.lang.Math.log1p;
import static java.lang.Math.pow;
/**
* General math utilities
*
* @author Tim Fennell
*/
final public class MathUtil {
private MathUtil(){};
/** The double value closest to 1 while still being less than 1. */
public static final double MAX_PROB_BELOW_ONE = 0.9999999999999999d;
/**
* this function mimics the behavior of log_1p but resulting in log _base 10_ of (1+x) instead of natural log of 1+x
*/
public static double log10_1p(final double x){
return log1p(x)/LOG_10_MATH.log_of_base;
}
/** Calculated the mean of an array of doubles. */
public static double mean(final double[] in, final int start, final int stop) {
double total = 0;
for (int i = start; i < stop; ++i) {
total += in[i];
}
return total / (stop - start);
}
/** Calculated the standard deviation of an array of doubles. */
public static double stddev(final double[] in, final int start, final int length) {
return stddev(in, start, length, mean(in, start, length));
}
/** Calculated the standard deviation of an array of doubles. */
public static double stddev(final double[] in, final int start, final int stop, final double mean) {
double total = 0;
for (int i = start; i < stop; ++i) {
total += (in[i] * in[i]);
}
return Math.sqrt((total / (stop - start)) - (mean * mean));
}
public static int compare(final int v1, final int v2) {
return (v1 < v2 ? -1 : (v1 == v2 ? 0 : 1));
}
/** Calculate the median of an array of doubles. Assumes that the input is sorted */
public static double median(final double... in) {
if (in.length == 0) {
throw new IllegalArgumentException("Attempting to find the median of an empty array");
}
final double[] data = Arrays.copyOf(in, in.length);
Arrays.sort(data);
final int middle = data.length / 2;
return data.length % 2 == 1 ? data[middle] : (data[middle - 1] + data[middle]) / 2.0;
}
/**
* Obtains percentage of two Longs
* @param numerator dividend
* @param denominator divisor
* @return numerator/(double)denominator if both are non-null and denominator != 0, else returns null.
*/
public static Double percentageOrNull(final Long numerator, final Long denominator) {
if (numerator != null && denominator != null && denominator != 0) {
return numerator.doubleValue() / denominator.doubleValue();
} else {
return null;
}
}
/**
* Round off the value to the specified precision.
*/
public static double round(final double num, final int precision) {
BigDecimal bd = new BigDecimal(num);
bd = bd.setScale(precision, BigDecimal.ROUND_HALF_UP);
return bd.doubleValue();
}
/** Returns the largest value stored in the array. */
public static double max(final double[] nums) {
return nums[indexOfMax(nums)];
}
/**
* Returns the index of the largest element in the array. If there are multiple equal maxima then
* the earliest one in the array is returned.
*/
public static int indexOfMax(final double[] nums) {
double max = nums[0];
int index = 0;
for (int i = 1; i < nums.length; ++i) {
if (nums[i] > max) {
max = nums[i];
index = i;
}
}
return index;
}
/** Returns the largest value stored in the array. */
public static long max(final long[] nums) {
return nums[indexOfMax(nums)];
}
/**
* Returns the index of the largest element in the array. If there are multiple equal maxima then
* the earliest one in the array is returned.
*/
public static int indexOfMax(final long[] nums) {
double max = nums[0];
int index = 0;
for (int i = 1; i < nums.length; ++i) {
if (nums[i] > max) {
max = nums[i];
index = i;
}
}
return index;
}
/** Returns the smallest value stored in the array. */
public static double min(final double[] nums) {
double min = nums[0];
for (int i = 1; i < nums.length; ++i) {
if (nums[i] < min) min = nums[i];
}
return min;
}
/** Returns the smallest value stored in the array. */
public static int min(final int[] nums) {
int min = nums[0];
for (int i = 1; i < nums.length; ++i) {
if (nums[i] < min) min = nums[i];
}
return min;
}
/** Returns the smallest value stored in the array. */
public static short min(final short[] nums) {
short min = nums[0];
for (int i = 1; i < nums.length; ++i) {
if (nums[i] < min) min = nums[i];
}
return min;
}
/** Returns the smallest value stored in the array. */
public static byte min(final byte[] nums) {
byte min = nums[0];
for (int i = 1; i < nums.length; ++i) {
if (nums[i] < min) min = nums[i];
}
return min;
}
/** Mimic's R's seq() function to produce a sequence of equally spaced numbers. */
public static double[] seq(final double from, final double to, final double by) {
if (from < to && by <= 0) return new double[0];
if (from > to && by >= 0) return new double[0];
final int values = 1 + (int) Math.floor((to - from) / by);
final double[] results = new double[values];
BigDecimal value = new BigDecimal(from);
BigDecimal increment = new BigDecimal(by);
for (int i=0; i<values; ++i) {
results[i] = value.doubleValue();
value = value.add(increment);
}
return results;
}
/** "Promotes" an int[] into a double array with the same values (or as close as precision allows). */
public static double[] promote(final int[] is) {
final double[] ds = new double[is.length];
for (int i = 0; i < is.length; ++i) ds[i] = is[i];
return ds;
}
@Deprecated // use pNormalizeLogProbability instead (renamed)
public static double[] logLikelihoodsToProbs(final double[] likelihoods) {
return pNormalizeLogProbability(likelihoods);
}
/**
* Takes a complete set of mutually exclusive logPosteriors and converts them to probabilities
* that sum to 1 with as much fidelity as possible. Limits probabilities to be in the space:
* 0.9999999999999999 >= p >= (1-0.9999999999999999)/(lPosteriors.length-1)
*/
public static double[] pNormalizeLogProbability(final double[] lPosterior) {
// Note: bumping all the LLs so that the biggest is 300 ensures that we have the
// widest range possible when unlogging them before one of them underflows. 10^300 is
// near the maximum before you hit positive infinity.
final double maxLikelihood = max(lPosterior);
final double bump = 300 - maxLikelihood;
final double[] tmp = new double[lPosterior.length];
double total = 0;
for (int i = 0; i < lPosterior.length; ++i) {
tmp[i] = pow(10, lPosterior[i] + bump);
total += tmp[i];
}
final double maxP = MAX_PROB_BELOW_ONE;
final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1);
for (int i = 0; i < lPosterior.length; ++i) {
tmp[i] /= total;
if (tmp[i] > maxP) tmp[i] = maxP;
else if (tmp[i] < minP) tmp[i] = minP;
}
return tmp;
}
/** Calculates the ratio of two arrays of the same length. */
public static double[] divide(final double[] numerators, final double[] denominators) {
if (numerators.length != denominators.length) throw new IllegalArgumentException("Arrays must be of same length.");
final int len = numerators.length;
final double[] result = new double[len];
for (int i = 0; i < len; ++i) result[i] = numerators[i] / denominators[i];
return result;
}
/**
* Takes a vector of numbers and converts it to a vector of probabilities
* that sum to 1 with as much fidelity as possible. Limits probabilities to be in the space:
* 0.9999999999999999 >= p >= (1-0.9999999999999999)/(likelihoods.length-1)
*/
public static double[] pNormalizeVector(final double[] pPosterior) {
final double[] tmp = new double[pPosterior.length];
final double total = sum(pPosterior);
final double maxP = MAX_PROB_BELOW_ONE;
final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1);
for (int i = 0; i < pPosterior.length; ++i) {
tmp[i] = pPosterior[i] / total;
if (tmp[i] > maxP) tmp[i] = maxP;
else if (tmp[i] < minP) tmp[i] = minP;
}
return tmp;
}
/** Calculates the product of two arrays of the same length. */
public static double[] multiply(final double[] lhs, final double[] rhs) {
if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length.");
final int len = lhs.length;
final double[] result = new double[len];
for (int i = 0; i < len; ++i) result[i] = lhs[i] * rhs[i];
return result;
}
/** calculates the sum of the arrays as a third array. */
public static double[] sum(final double[] lhs, final double[] rhs) {
if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length.");
final int len = lhs.length;
final double [] result = new double[len];
for (int i = 0; i < len; ++i) result[i] = lhs[i] + rhs[i];
return result;
}
/** Returns the sum of the elements in the array. */
public static double sum(final double[] arr) {
double result = 0;
for (final double next : arr) result += next;
return result;
}
/** Returns the sum of the elements in the array starting with start and ending before stop. */
public static long sum(final long[] arr, final int start, final int stop) {
long result = 0;
for (int i=start; i<stop; ++i) {
result += arr[i];
}
return result;
}
public static final LogMath LOG_2_MATH = new LogMath(2);
public static final LogMath NATURAL_LOG_MATH = new LogMath(Math.exp(1)) {
@Override
public double getLogValue(final double nonLogValue) {
return Math.log(nonLogValue);
}
};
public static final LogMath LOG_10_MATH = new LogMath(10) {
@Override
public double getLogValue(final double nonLogValue) {
return Math.log10(nonLogValue);
}
};
/**
* A collection of common math operations that work with log values. To use it, pass values from log space, the operation will be
* computed in non-log space, and a value in log space will be returned.
*/
public static class LogMath {
private final double base;
private final double log_of_base;
public double getLog_of_base() {
return log_of_base;
}
private LogMath(final double base) {
this.base = base;
this.log_of_base=Math.log(base);
}
/** Returns the decimal representation of the provided log values. */
public double getNonLogValue(final double logValue) {
return Math.pow(base, logValue);
}
/** Returns the log-representation of the provided decimal value. */
public double getLogValue(final double nonLogValue) {
return Math.log(nonLogValue) / log_of_base;
}
/** Returns the log-representation of the provided decimal array. */
public double[] getLogValue(final double[] nonLogArray) {
final double[] logArray = new double[nonLogArray.length];
for (int i = 0; i < nonLogArray.length; i++) {
logArray[i] = getLogValue(nonLogArray[i]);
}
return logArray;
}
/** Computes the mean of the provided log values. */
public double mean(final double... logValues) {
return sum(logValues) - getLogValue(logValues.length);
}
/** Computes the sum of the provided log values. */
public double sum(final double... logValues) {
// Avoid overflow via scaling.
final double scalingFactor = max(logValues);
double simpleAdditionResult = 0;
for (final double v : logValues) {
simpleAdditionResult += getNonLogValue(v - scalingFactor);
}
return getLogValue(simpleAdditionResult) + scalingFactor;
}
/** Computes the sum of the provided log values. */
public double product(final double... logValues) {
return MathUtil.sum(logValues);
}
}
}