-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathEthCrawler.java
More file actions
115 lines (101 loc) · 4.14 KB
/
EthCrawler.java
File metadata and controls
115 lines (101 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
/**
* Searches for new links on website and sends them through a telegram bot to you.
*
* @author Thiemo Zaugg
*/
public class EthCrawler {
public static void main(String[] args) {
//add users to dobby chat bot
ArrayList<String> chatIds = new ArrayList<>();
// make array of all urls
ArrayList<UrlName> urls = new ArrayList<>();
// define urls here
///////////////////////////////////////////////////////////////////////////
//Required:
//setup for the telegram bot:
String token = "";
//-> add all channel names (@channel_name)
chatIds.add("");
//-> add url's
final String myWebsiteUrl = "";
urls.add(new UrlName("NameOfMyWebsite", myWebsiteUrl, false)); //last argument can be set if moodle login is required
//Optional:
//Telegram channel name (@channel_name) for error messages
String errorChatId = "";
//if moodle login is used:
String userNameMoodle = "";
String passwordMoodle = "";
///////////////////////////////////////////////////////////////////////////
// init Telegram MessageBot (Dobby)
DobbySpiderBot dobby = new DobbySpiderBot(chatIds, errorChatId, token);
// iterate over all urls
for (UrlName url : urls) {
Document doc;
// get html-Document
try {
if (url.moodle) {
//additionally log into moodle
doc = LogInBot.logOn(url.url, userNameMoodle, passwordMoodle);
if (doc == null) continue;
} else {
doc = Jsoup.connect(url.url).get();
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("couldn't connect to: " + url.name + " website");
dobby.sendErrMsg("couldn't connect to: " + url.name + " website");
continue;
}
analysePage(doc, url, dobby); //searches for new links and sends them to users
}
System.out.println("done! - all sites parsed");
dobby.sendErrMsg("done! - all sites parsed");
}
private static void analysePage(Document doc, UrlName url, DobbySpiderBot dobby) {
Element content = doc.body(); // select relevant Elements
Elements links_new = content.select("a[href]"); // get all links
Storage st = new Storage(url.name + ".txt"); // get Links form last run
ArrayList<String> links_old;
//search for new links (compares old List of links with new List of links)
try {
links_old = st.getLinks(); // load links from Storage
for (Element link : links_new) {
if (!(links_old.contains(link.attr("href")))) {
//send message to telegram users
dobby.sendMsg("Dobby found something! " + link.text() + " you'll find it here: " + link.attr("href"));
}
}
} catch (FileNotFoundException e1) {
e1.printStackTrace();
//trying to create storage files
try {
st.createStorage();
} catch (IOException e) {
e.printStackTrace();
System.out.println("couldn't create storage file");
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("couldn't find Storage files!");
dobby.sendErrMsg("couldn't find Storage files!");
}
//store links in Storage
try {
st.storeLinks(links_new);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
dobby.sendErrMsg("Storage: FileNotFoundException");
} catch (IOException e1) {
e1.printStackTrace();
dobby.sendErrMsg("Storage: IOException");
}
System.out.println(url.name + "--> done");
}
}