Skip to content

Commit 181943a

Browse files
authored
Merge pull request eugenp#7503 from alimate/BAEL-3070
BAEL-3070: K-Means Clustering Code Samples
2 parents f34841b + 5e7396b commit 181943a

12 files changed

Lines changed: 4640 additions & 9 deletions

File tree

algorithms-miscellaneous-3/pom.xml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
33
<modelVersion>4.0.0</modelVersion>
44
<artifactId>algorithms-miscellaneous-3</artifactId>
55
<version>0.0.1-SNAPSHOT</version>
@@ -18,17 +18,28 @@
1818
<version>${org.assertj.core.version}</version>
1919
<scope>test</scope>
2020
</dependency>
21-
21+
22+
<dependency>
23+
<groupId>org.apache.commons</groupId>
24+
<artifactId>commons-collections4</artifactId>
25+
<version>${commons-collections4.version}</version>
26+
</dependency>
27+
28+
<dependency>
29+
<groupId>com.google.guava</groupId>
30+
<artifactId>guava</artifactId>
31+
<version>${guava.version}</version>
32+
</dependency>
33+
2234
<dependency>
23-
<groupId>org.apache.commons</groupId>
24-
<artifactId>commons-collections4</artifactId>
25-
<version>${commons-collections4.version}</version>
35+
<groupId>com.squareup.retrofit2</groupId>
36+
<artifactId>retrofit</artifactId>
37+
<version>${retrofit.version}</version>
2638
</dependency>
27-
2839
<dependency>
29-
<groupId>com.google.guava</groupId>
30-
<artifactId>guava</artifactId>
31-
<version>${guava.version}</version>
40+
<groupId>com.squareup.retrofit2</groupId>
41+
<artifactId>converter-jackson</artifactId>
42+
<version>${retrofit.version}</version>
3243
</dependency>
3344

3445
<dependency>
@@ -61,5 +72,6 @@
6172
<org.assertj.core.version>3.9.0</org.assertj.core.version>
6273
<commons-collections4.version>4.3</commons-collections4.version>
6374
<guava.version>28.0-jre</guava.version>
75+
<retrofit.version>2.6.0</retrofit.version>
6476
</properties>
6577
</project>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package com.baeldung.algorithms.kmeans;
2+
3+
import java.util.Map;
4+
import java.util.Objects;
5+
6+
/**
7+
* Encapsulates all coordinates for a particular cluster centroid.
8+
*/
9+
public class Centroid {
10+
11+
/**
12+
* The centroid coordinates.
13+
*/
14+
private final Map<String, Double> coordinates;
15+
16+
public Centroid(Map<String, Double> coordinates) {
17+
this.coordinates = coordinates;
18+
}
19+
20+
public Map<String, Double> getCoordinates() {
21+
return coordinates;
22+
}
23+
24+
@Override
25+
public boolean equals(Object o) {
26+
if (this == o) {
27+
return true;
28+
}
29+
if (o == null || getClass() != o.getClass()) {
30+
return false;
31+
}
32+
Centroid centroid = (Centroid) o;
33+
return Objects.equals(getCoordinates(), centroid.getCoordinates());
34+
}
35+
36+
@Override
37+
public int hashCode() {
38+
return Objects.hash(getCoordinates());
39+
}
40+
41+
@Override
42+
public String toString() {
43+
return "Centroid " + coordinates;
44+
}
45+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.baeldung.algorithms.kmeans;
2+
3+
import java.util.Map;
4+
5+
/**
6+
* Defines a contract to calculate distance between two feature vectors. The less the
7+
* calculated distance, the more two items are similar to each other.
8+
*/
9+
public interface Distance {
10+
11+
/**
12+
* Calculates the distance between two feature vectors.
13+
*
14+
* @param f1 The first set of features.
15+
* @param f2 The second set of features.
16+
* @return Calculated distance.
17+
* @throws IllegalArgumentException If the given feature vectors are invalid.
18+
*/
19+
double calculate(Map<String, Double> f1, Map<String, Double> f2);
20+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.baeldung.algorithms.kmeans;
2+
3+
import java.util.List;
4+
import java.util.Map;
5+
6+
/**
7+
* Encapsulates methods to calculates errors between centroid and the cluster members.
8+
*/
9+
public class Errors {
10+
11+
public static double sse(Map<Centroid, List<Record>> clustered, Distance distance) {
12+
double sum = 0;
13+
for (Map.Entry<Centroid, List<Record>> entry : clustered.entrySet()) {
14+
Centroid centroid = entry.getKey();
15+
for (Record record : entry.getValue()) {
16+
double d = distance.calculate(centroid.getCoordinates(), record.getFeatures());
17+
sum += Math.pow(d, 2);
18+
}
19+
}
20+
21+
return sum;
22+
}
23+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.baeldung.algorithms.kmeans;
2+
3+
import java.util.Map;
4+
5+
/**
6+
* Calculates the distance between two items using the Euclidean formula.
7+
*/
8+
public class EuclideanDistance implements Distance {
9+
10+
@Override
11+
public double calculate(Map<String, Double> f1, Map<String, Double> f2) {
12+
if (f1 == null || f2 == null) {
13+
throw new IllegalArgumentException("Feature vectors can't be null");
14+
}
15+
16+
double sum = 0;
17+
for (String key : f1.keySet()) {
18+
Double v1 = f1.get(key);
19+
Double v2 = f2.get(key);
20+
21+
if (v1 != null && v2 != null) sum += Math.pow(v1 - v2, 2);
22+
}
23+
24+
return Math.sqrt(sum);
25+
}
26+
}

0 commit comments

Comments
 (0)