Skip to content

Commit 17567e3

Browse files
author
linyiqun
committed
BIRCH算法运用CF聚类特征树实现聚类分类
BIRCH算法运用CF聚类特征树实现聚类分类
1 parent 6168494 commit 17567e3

8 files changed

Lines changed: 1145 additions & 0 deletions

File tree

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
package DataMining_BIRCH;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileReader;
6+
import java.io.IOException;
7+
import java.text.MessageFormat;
8+
import java.util.ArrayList;
9+
import java.util.LinkedList;
10+
11+
/**
12+
* BIRCH聚类算法工具类
13+
*
14+
* @author lyq
15+
*
16+
*/
17+
public class BIRCHTool {
18+
// 节点类型名称
19+
public static final String NON_LEAFNODE = "【NonLeafNode】";
20+
public static final String LEAFNODE = "【LeafNode】";
21+
public static final String CLUSTER = "【Cluster】";
22+
23+
// 测试数据文件地址
24+
private String filePath;
25+
// 内部节点平衡因子B
26+
public static int B;
27+
// 叶子节点平衡因子L
28+
public static int L;
29+
// 簇直径阈值T
30+
public static double T;
31+
// 总的测试数据记录
32+
private ArrayList<String[]> totalDataRecords;
33+
34+
public BIRCHTool(String filePath, int B, int L, double T) {
35+
this.filePath = filePath;
36+
this.B = B;
37+
this.L = L;
38+
this.T = T;
39+
readDataFile();
40+
}
41+
42+
/**
43+
* 从文件中读取数据
44+
*/
45+
private void readDataFile() {
46+
File file = new File(filePath);
47+
ArrayList<String[]> dataArray = new ArrayList<String[]>();
48+
49+
try {
50+
BufferedReader in = new BufferedReader(new FileReader(file));
51+
String str;
52+
String[] tempArray;
53+
while ((str = in.readLine()) != null) {
54+
tempArray = str.split(" ");
55+
dataArray.add(tempArray);
56+
}
57+
in.close();
58+
} catch (IOException e) {
59+
e.getStackTrace();
60+
}
61+
62+
totalDataRecords = new ArrayList<>();
63+
for (String[] array : dataArray) {
64+
totalDataRecords.add(array);
65+
}
66+
}
67+
68+
/**
69+
* 构建CF聚类特征树
70+
*
71+
* @return
72+
*/
73+
private ClusteringFeature buildCFTree() {
74+
NonLeafNode rootNode = null;
75+
LeafNode leafNode = null;
76+
Cluster cluster = null;
77+
78+
for (String[] record : totalDataRecords) {
79+
cluster = new Cluster(record);
80+
81+
if (rootNode == null) {
82+
// CF树只有1个节点的时候的情况
83+
if (leafNode == null) {
84+
leafNode = new LeafNode();
85+
}
86+
leafNode.addingCluster(cluster);
87+
if (leafNode.getParentNode() != null) {
88+
rootNode = leafNode.getParentNode();
89+
}
90+
} else {
91+
if (rootNode.getParentNode() != null) {
92+
rootNode = rootNode.getParentNode();
93+
}
94+
95+
// 从根节点开始,从上往下寻找到最近的添加目标叶子节点
96+
LeafNode temp = rootNode.findedClosestNode(cluster);
97+
temp.addingCluster(cluster);
98+
}
99+
}
100+
101+
// 从下往上找出最上面的节点
102+
LeafNode node = cluster.getParentNode();
103+
NonLeafNode upNode = node.getParentNode();
104+
if (upNode == null) {
105+
return node;
106+
} else {
107+
while (upNode.getParentNode() != null) {
108+
upNode = upNode.getParentNode();
109+
}
110+
111+
return upNode;
112+
}
113+
}
114+
115+
/**
116+
* 开始构建CF聚类特征树
117+
*/
118+
public void startBuilding() {
119+
// 树深度
120+
int level = 1;
121+
ClusteringFeature rootNode = buildCFTree();
122+
123+
setTreeLevel(rootNode, level);
124+
showCFTree(rootNode);
125+
}
126+
127+
/**
128+
* 设置节点深度
129+
*
130+
* @param clusteringFeature
131+
* 当前节点
132+
* @param level
133+
* 当前深度值
134+
*/
135+
private void setTreeLevel(ClusteringFeature clusteringFeature, int level) {
136+
LeafNode leafNode = null;
137+
NonLeafNode nonLeafNode = null;
138+
139+
if (clusteringFeature instanceof LeafNode) {
140+
leafNode = (LeafNode) clusteringFeature;
141+
} else if (clusteringFeature instanceof NonLeafNode) {
142+
nonLeafNode = (NonLeafNode) clusteringFeature;
143+
}
144+
145+
if (nonLeafNode != null) {
146+
nonLeafNode.setLevel(level);
147+
level++;
148+
// 设置子节点
149+
if (nonLeafNode.getNonLeafChilds() != null) {
150+
for (NonLeafNode n1 : nonLeafNode.getNonLeafChilds()) {
151+
setTreeLevel(n1, level);
152+
}
153+
} else {
154+
for (LeafNode n2 : nonLeafNode.getLeafChilds()) {
155+
setTreeLevel(n2, level);
156+
}
157+
}
158+
} else {
159+
leafNode.setLevel(level);
160+
level++;
161+
// 设置子聚簇
162+
for (Cluster c : leafNode.getClusterChilds()) {
163+
c.setLevel(level);
164+
}
165+
}
166+
}
167+
168+
/**
169+
* 显示CF聚类特征树
170+
*
171+
* @param rootNode
172+
* CF树根节点
173+
*/
174+
private void showCFTree(ClusteringFeature rootNode) {
175+
// 空格数,用于输出
176+
int blankNum = 5;
177+
// 当前树深度
178+
int currentLevel = 1;
179+
LinkedList<ClusteringFeature> nodeQueue = new LinkedList<>();
180+
ClusteringFeature cf;
181+
LeafNode leafNode;
182+
NonLeafNode nonLeafNode;
183+
ArrayList<Cluster> clusterList = new ArrayList<>();
184+
String typeName;
185+
186+
nodeQueue.add(rootNode);
187+
while (nodeQueue.size() > 0) {
188+
cf = nodeQueue.poll();
189+
190+
if (cf instanceof LeafNode) {
191+
leafNode = (LeafNode) cf;
192+
typeName = LEAFNODE;
193+
194+
if (leafNode.getClusterChilds() != null) {
195+
for (Cluster c : leafNode.getClusterChilds()) {
196+
nodeQueue.add(c);
197+
}
198+
}
199+
} else if (cf instanceof NonLeafNode) {
200+
nonLeafNode = (NonLeafNode) cf;
201+
typeName = NON_LEAFNODE;
202+
203+
if (nonLeafNode.getNonLeafChilds() != null) {
204+
for (NonLeafNode n1 : nonLeafNode.getNonLeafChilds()) {
205+
nodeQueue.add(n1);
206+
}
207+
} else {
208+
for (LeafNode n2 : nonLeafNode.getLeafChilds()) {
209+
nodeQueue.add(n2);
210+
}
211+
}
212+
} else {
213+
clusterList.add((Cluster)cf);
214+
typeName = CLUSTER;
215+
}
216+
217+
if (currentLevel != cf.getLevel()) {
218+
currentLevel = cf.getLevel();
219+
System.out.println();
220+
System.out.println("|");
221+
System.out.println("|");
222+
}else if(currentLevel == cf.getLevel() && currentLevel != 1){
223+
for (int i = 0; i < blankNum; i++) {
224+
System.out.print("-");
225+
}
226+
}
227+
228+
System.out.print(typeName);
229+
System.out.print("N:" + cf.getN() + ", LS:");
230+
System.out.print("[");
231+
for (double d : cf.getLS()) {
232+
System.out.print(MessageFormat.format("{0}, ", d));
233+
}
234+
System.out.print("]");
235+
}
236+
237+
System.out.println();
238+
System.out.println("*******最终分好的聚簇****");
239+
//显示已经分好类的聚簇点
240+
for(int i=0; i<clusterList.size(); i++){
241+
System.out.println("Cluster" + (i+1) + ":");
242+
for(double[] point: clusterList.get(i).getData()){
243+
System.out.print("[");
244+
for (double d : point) {
245+
System.out.print(MessageFormat.format("{0}, ", d));
246+
}
247+
System.out.println("]");
248+
}
249+
}
250+
}
251+
252+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package DataMining_BIRCH;
2+
3+
/**
4+
* BIRCH聚类算法调用类
5+
* @author lyq
6+
*
7+
*/
8+
public class Client {
9+
public static void main(String[] args){
10+
String filePath = "C:\\Users\\lyq\\Desktop\\icon\\testInput.txt";
11+
//内部节点平衡因子B
12+
int B = 2;
13+
//叶子节点平衡因子L
14+
int L = 2;
15+
//簇直径阈值T
16+
double T = 0.6;
17+
18+
BIRCHTool tool = new BIRCHTool(filePath, B, L, T);
19+
tool.startBuilding();
20+
}
21+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package DataMining_BIRCH;
2+
3+
import java.util.ArrayList;
4+
5+
/**
6+
* 叶子节点中的小集群
7+
* @author lyq
8+
*
9+
*/
10+
public class Cluster extends ClusteringFeature{
11+
//集群中的数据点
12+
private ArrayList<double[]> data;
13+
//父亲节点
14+
private LeafNode parentNode;
15+
16+
public Cluster(String[] record){
17+
double[] d = new double[record.length];
18+
data = new ArrayList<>();
19+
for(int i=0; i<record.length; i++){
20+
d[i] = Double.parseDouble(record[i]);
21+
}
22+
data.add(d);
23+
//计算CF聚类特征
24+
this.setLS(data);
25+
this.setSS(data);
26+
this.setN(data);
27+
}
28+
29+
public ArrayList<double[]> getData() {
30+
return data;
31+
}
32+
33+
public void setData(ArrayList<double[]> data) {
34+
this.data = data;
35+
}
36+
37+
@Override
38+
protected void directAddCluster(ClusteringFeature node) {
39+
//如果是聚类包括数据记录,则还需合并数据记录
40+
Cluster c = (Cluster)node;
41+
ArrayList<double[]> dataRecords = c.getData();
42+
this.data.addAll(dataRecords);
43+
44+
super.directAddCluster(node);
45+
}
46+
47+
public LeafNode getParentNode() {
48+
return parentNode;
49+
}
50+
51+
public void setParentNode(LeafNode parentNode) {
52+
this.parentNode = parentNode;
53+
}
54+
55+
@Override
56+
public void addingCluster(ClusteringFeature clusteringFeature) {
57+
// TODO Auto-generated method stub
58+
59+
}
60+
}

0 commit comments

Comments
 (0)