forked from txs72/JavaTutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebCrawler.java
More file actions
executable file
·67 lines (65 loc) · 2.45 KB
/
WebCrawler.java
File metadata and controls
executable file
·67 lines (65 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/* */ package ch12_io;
/* */ import java.net.URL;
/* */ import java.util.ArrayList;
/* */ import java.util.Scanner;
/* */
/* */ public class WebCrawler {
/* */ public static void main(String[] args) {
/* 8 */ Scanner input = new Scanner(System.in);
/* 9 */ System.out.print("Enter a URL: ");
/* 10 */ String url = input.nextLine();
/* 11 */ crawler(url);
/* */ }
/* */
/* */ public static void crawler(String startingURL) {
/* 15 */ ArrayList<String> listOfPendingURLs = new ArrayList<>();
/* 16 */ ArrayList<String> listOfTraversedURLs = new ArrayList<>();
/* */
/* 18 */ listOfPendingURLs.add(startingURL);
/* 19 */ while (!listOfPendingURLs.isEmpty() && listOfTraversedURLs
/* 20 */ .size() <= 100) {
/* 21 */ String urlString = listOfPendingURLs.remove(0);
/* 22 */ if (!listOfTraversedURLs.contains(urlString)) {
/* 23 */ listOfTraversedURLs.add(urlString);
/* 24 */ System.out.println("Craw " + urlString);
/* */
/* 26 */ for (String s : getSubURLs(urlString)) {
/* 27 */ if (!listOfTraversedURLs.contains(s))
/* 28 */ listOfPendingURLs.add(s);
/* */ }
/* */ }
/* */ }
/* */ }
/* */
/* */ public static ArrayList<String> getSubURLs(String urlString) {
/* 35 */ ArrayList<String> list = new ArrayList<>();
/* */
/* */ try {
/* 38 */ URL url = new URL(urlString);
/* 39 */ Scanner input = new Scanner(url.openStream());
/* 40 */ int current = 0;
/* 41 */ while (input.hasNext()) {
/* 42 */ String line = input.nextLine();
/* 43 */ current = line.indexOf("http:", current);
/* 44 */ while (current > 0) {
/* 45 */ int endIndex = line.indexOf(">", current);
/* 46 */ if (endIndex > 0) {
/* 47 */ list.add(line.substring(current, endIndex));
/* 48 */ current = line.indexOf("http:", endIndex);
/* */ continue;
/* */ }
/* 51 */ current = -1;
/* */ }
/* */
/* */ }
/* 55 */ } catch (Exception ex) {
/* 56 */ System.out.println("Error: " + ex.getMessage());
/* */ }
/* */
/* 59 */ return list;
/* */ }
/* */ }
/* Location: /Volumes/TXS.128G/hope useful/practice/2020.jar!/ch12_io/WebCrawler.class
* Java compiler version: 8 (52.0)
* JD-Core Version: 1.1.3
*/