-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathMainDataAnalysis.java
More file actions
395 lines (339 loc) · 15.2 KB
/
MainDataAnalysis.java
File metadata and controls
395 lines (339 loc) · 15.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
import net.sf.json.JSONObject;
import net.sf.json.JSONArray;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class MainDataAnalysis {
/**
* 程序主入口-计算核心的串行实现
*
* return jsonOfAnswer JSONObject
*/
public static JSONObject runBySequentialComputing() {
try {
//计时器工具初始化
ProgramTimer pt = new ProgramTimer();
pt.start();
//IP分布范围工具初始化
IpAreaRange ipar = new IpAreaRange();
//MAC地址统计工具初始化
MacAddressStatistics mast = new MacAddressStatistics();
//日志格式正则(NginX日志,txt)
//pattern = "([^ ]*) ([^ ]*) ([^ ]*) (\\[.*\\]) (\\\".*?\\\") (-|[0-9]*) (-|[0-9]*) (\\\".*?\\\") (\\\".*?\\\") \\\"([^\\\"]*)\\\"";
//日志格式正则(比赛日志,csv)
String pattern = "(,)?((\\\"[^\\\"]*(\\\"{2})*[^\\\"]*\\\")*[^,]*)";
Pattern r = Pattern.compile(pattern);
//读入日志文件
File file = new File(FilePath.logFile);
//缓冲流
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
//缓冲区大小为5M
BufferedReader reader = new BufferedReader(new InputStreamReader(fis, "utf-8"), 5 * 1024 * 1024);
String line = "";
//处理的记录数
int dataCount = 0;
int sumDataCount = 0;
//提取的条件
String needDate = "2018-07-17";
//串行实现计算任务
while ((line = reader.readLine()) != null) {
//抽取日志文件的一行进行处理
sumDataCount++;
//匹配CSV中最小单位的数据
Matcher matcher = r.matcher(line);
ArrayList<String> listOfLineData = new ArrayList<String>();
while(matcher.find()) {
String cell = matcher.group(2);//group(2) is ((\"[^\"]*(\"{2})*[^\"]*\")*[^,]*)
Pattern pattern2 = Pattern.compile("\"((.)*)\"");
Matcher matcher2 = pattern2.matcher(cell);
if(matcher2.find()) {
cell = matcher2.group(1);
}
//合并同一行的数据
listOfLineData.add(cell);
}
//按照匹配条件提取当前行需要的数据
if (listOfLineData.get(1).equals(needDate)){
String region;
//该记录存在多个IP地址
if (listOfLineData.get(5).contains(",")){
String[] tmp = listOfLineData.get(5).split(", ");
region = ipar.search(tmp[tmp.length-1]);
}
else {
region = ipar.search(listOfLineData.get(5));
}
//该记录不存在MAC地址
if (listOfLineData.get(4).isEmpty()){
mast.insert("00-00-00-00-00-00");
}
else {
mast.insert(listOfLineData.get(4));
}
dataCount++;
}
}
//输出匹配记录数
System.out.println("Matched/Summary Data Counter: " + dataCount + "/" + sumDataCount);
//程序运行时间
System.out.println("串行计算" + pt.runningTime());
//将结果转换为JSON传出
JSONArray arrayOfMac = JSONArray.fromObject(mast.getAnswer());
JSONArray arrayOfIp = JSONArray.fromObject(ipar.getRegionCount());
JSONObject jsonOfAnswer = new JSONObject();
jsonOfAnswer.put("mac",arrayOfMac);
jsonOfAnswer.put("prov",arrayOfIp);
jsonOfAnswer.put("Matched",dataCount);
jsonOfAnswer.put("Summary",sumDataCount);
jsonOfAnswer.put("RunningTime:",pt.runningTime());
return jsonOfAnswer;
} catch (IOException e) {
System.out.println(e);
}
return null;
}
/**
* 程序主入口-计算核心的并行实现-线程安全
*
* return jsonOfAnswer JSONObject
*/
public static JSONObject runByParallelComputing() {
try {
//计时器工具初始化
ProgramTimer pt = new ProgramTimer();
pt.start();
//IP分布范围工具初始化
IpAreaRange ipar = new IpAreaRange();
//MAC地址统计工具初始化
MacAddressStatistics mast = new MacAddressStatistics();
//日志格式正则(NginX日志,txt)
//pattern = "([^ ]*) ([^ ]*) ([^ ]*) (\\[.*\\]) (\\\".*?\\\") (-|[0-9]*) (-|[0-9]*) (\\\".*?\\\") (\\\".*?\\\") \\\"([^\\\"]*)\\\"";
//日志格式正则(比赛日志,csv)
String pattern = "(,)?((\\\"[^\\\"]*(\\\"{2})*[^\\\"]*\\\")*[^,]*)";
//正则匹配初始化
Pattern r = Pattern.compile(pattern);
//读入日志文件
File file = new File(FilePath.logFile);
//缓冲流
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
//缓冲区大小为5M
BufferedReader reader = new BufferedReader(new InputStreamReader(fis, "utf-8"), 5 * 1024 * 1024);
//处理的记录数(总记录数)
int[] testSumDataCount = {0};
//处理的记录数(按照条件匹配到的记录数)
int[] testDataCount = {0};
//提取的条件
String needDate = "2018-07-17";
//线程锁
Lock lock = new ReentrantLock();
//根据物理机CPU核心数自动判断可以开启的线程数量,并行处理计算任务
reader.lines().parallel().forEach((String line) -> {
//每个线程抽取日志文件的一行进行处理
//原子操作使用线程锁确保线程安全
lock.lock();
try{
testSumDataCount[0] ++ ;
}
finally {
lock.unlock();
}
//匹配CSV中最小单位的数据
Matcher matcher = r.matcher(line);
List<String> listOfLineData = Collections.synchronizedList(new ArrayList<String>());
while(matcher.find()) {
String cell = matcher.group(2);//group(2) is ((\"[^\"]*(\"{2})*[^\"]*\")*[^,]*)
Pattern pattern2 = Pattern.compile("\"((.)*)\"");
Matcher matcher2 = pattern2.matcher(cell);
if(matcher2.find()) {
cell = matcher2.group(1);
}
//合并同一行的数据
listOfLineData.add(cell);
}
//按照匹配条件提取当前行需要的数据
if (listOfLineData.get(1).equals(needDate)){
String region;
String infoOfIp;
String infoOfMac;
//该记录存在多个IP地址
if (listOfLineData.get(5).contains(",")){
String[] tmp = listOfLineData.get(5).split(", ");
infoOfIp = tmp[tmp.length-1];
}
else {
infoOfIp = listOfLineData.get(5);
}
//该记录不存在MAC地址
if (listOfLineData.get(4).isEmpty()){
infoOfMac = "00-00-00-00-00-00";
}
else {
infoOfMac = listOfLineData.get(4);
}
//对取得的数据进行记录(线程安全)
lock.lock();
try{
ipar.search(infoOfIp);
mast.insert(infoOfMac);
testDataCount[0]++;
}
finally {
lock.unlock();
}
}
});
//输出匹配记录数
System.out.println("Matched/Summary Data Counter: " + testDataCount[0] + "/" + testSumDataCount[0]);
//程序运行时间
System.out.println("并行计算" + pt.runningTime());
//将结果转换为JSON传出
JSONArray arrayOfMac = JSONArray.fromObject(mast.getAnswer());
JSONArray arrayOfIp = JSONArray.fromObject(ipar.getRegionCount());
JSONObject jsonOfAnswer = new JSONObject();
jsonOfAnswer.put("mac",arrayOfMac);
jsonOfAnswer.put("prov",arrayOfIp);
jsonOfAnswer.put("Matched",testDataCount[0]);
jsonOfAnswer.put("Summary",testSumDataCount[0]);
jsonOfAnswer.put("RunningTime:",pt.runningTime());
return jsonOfAnswer;
} catch (IOException e) {
System.out.println(e);
}
return null;
}
/**
* 程序主入口-计算核心的并行实现-非线程安全
*
* return jsonOfAnswer JSONObject
*/
public static JSONObject runByParallelComputingNotSafe() {
try {
//计时器工具初始化
ProgramTimer pt = new ProgramTimer();
pt.start();
//IP分布范围工具初始化
IpAreaRange ipar = new IpAreaRange();
//MAC地址统计工具初始化
MacAddressStatistics mast = new MacAddressStatistics();
//日志格式正则(NginX日志,txt)
//pattern = "([^ ]*) ([^ ]*) ([^ ]*) (\\[.*\\]) (\\\".*?\\\") (-|[0-9]*) (-|[0-9]*) (\\\".*?\\\") (\\\".*?\\\") \\\"([^\\\"]*)\\\"";
//日志格式正则(比赛日志,csv)
String pattern = "(,)?((\\\"[^\\\"]*(\\\"{2})*[^\\\"]*\\\")*[^,]*)";
//正则匹配初始化
Pattern r = Pattern.compile(pattern);
//读入日志文件
File file = new File(FilePath.logFile);
//缓冲流
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
//缓冲区大小为5M
BufferedReader reader = new BufferedReader(new InputStreamReader(fis, "utf-8"), 5 * 1024 * 1024);
//处理的记录数(总记录数)
int[] testSumDataCount = {0};
//处理的记录数(按照条件匹配到的记录数)
int[] testDataCount = {0};
//提取的条件
String needDate = "2018-07-17";
//线程锁
Lock lock = new ReentrantLock();
//根据物理机CPU核心数自动判断可以开启的线程数量,并行处理计算任务
reader.lines().parallel().forEach((String line) -> {
//每个线程抽取日志文件的一行进行处理
//原子操作使用线程锁确保线程安全
lock.lock();
try{
testSumDataCount[0] ++ ;
}
finally {
lock.unlock();
}
//匹配CSV中最小单位的数据
Matcher matcher = r.matcher(line);
List<String> listOfLineData = Collections.synchronizedList(new ArrayList<String>());
while(matcher.find()) {
String cell = matcher.group(2);//group(2) is ((\"[^\"]*(\"{2})*[^\"]*\")*[^,]*)
Pattern pattern2 = Pattern.compile("\"((.)*)\"");
Matcher matcher2 = pattern2.matcher(cell);
if(matcher2.find()) {
cell = matcher2.group(1);
}
//合并同一行的数据
listOfLineData.add(cell);
}
//按照匹配条件提取当前行需要的数据
if (listOfLineData.get(1).equals(needDate)){
String region;
String infoOfIp;
String infoOfMac;
//该记录存在多个IP地址
if (listOfLineData.get(5).contains(",")){
String[] tmp = listOfLineData.get(5).split(", ");
infoOfIp = tmp[tmp.length-1];
}
else {
infoOfIp = listOfLineData.get(5);
}
//该记录不存在MAC地址
if (listOfLineData.get(4).isEmpty()){
infoOfMac = "00-00-00-00-00-00";
}
else {
infoOfMac = listOfLineData.get(4);
}
ipar.search(infoOfIp);
mast.insert(infoOfMac);
lock.lock();
try{
testDataCount[0]++;
}
finally {
lock.unlock();
}
}
});
//输出匹配记录数
System.out.println("Matched/Summary Data Counter: " + testDataCount[0] + "/" + testSumDataCount[0]);
//程序运行时间
System.out.println("非安全并行计算" + pt.runningTime());
//将结果转换为JSON传出
JSONArray arrayOfMac = JSONArray.fromObject(mast.getAnswer());
JSONArray arrayOfIp = JSONArray.fromObject(ipar.getRegionCount());
JSONObject jsonOfAnswer = new JSONObject();
jsonOfAnswer.put("mac",arrayOfMac);
jsonOfAnswer.put("prov",arrayOfIp);
jsonOfAnswer.put("Matched",testDataCount[0]);
jsonOfAnswer.put("Summary",testSumDataCount[0]);
jsonOfAnswer.put("RunningTime:",pt.runningTime());
return jsonOfAnswer;
} catch (IOException e) {
System.out.println(e);
}
return null;
}
/**
* 获取系统性能信息的入口函数(注意!仅支持Windows系统和Linux系统,不支持MacOS)
*
* @return jsonOfAnswer JSONObject
*/
public static JSONObject getSystemInfo() throws Exception {
//当前系统的CPU使用率
double cpuUsage = SystemInfo.getCpuUsage();
//当前系统的内存使用率
double memUsage = SystemInfo.getMemUsage();
//当前系统的硬盘使用率
double diskUsage = SystemInfo.getDiskUsage();
JSONObject jsonOfAnswer = new JSONObject();
jsonOfAnswer.put("CPU",Integer.toString(
Integer.parseInt(new java.text.DecimalFormat("0").format(cpuUsage))));
return jsonOfAnswer;
}
public static void main(String[] args){
//System.out.println(MainDataAnalysis.runBySequentialComputing());
//System.out.println(MainDataAnalysis.runByParallelComputing());
System.out.println(MainDataAnalysis.runByParallelComputingNotSafe());
}
}