-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebScraper.java
More file actions
179 lines (140 loc) · 6.97 KB
/
WebScraper.java
File metadata and controls
179 lines (140 loc) · 6.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.Font;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultHighlighter;
import javax.swing.text.Highlighter;
/**
*
* @author john_carlson WebScraper demonstrates how to connect to a URL and load
* a webpage Demonstrates how to search it for matching substrings and
* to highlight them
*
*/
public class WebScraper extends JFrame implements ActionListener {
private static final long serialVersionUID = 1800;
private JButton button;
private JTextField searchField;
private JTextArea textArea;
private URL url;
// plenty of possible pirate products to purchase:
private String[] urls = { "https://www.walmart.com/ip/PLAYMOBIL-Pirate-Raiders-Ship/54169441", // 2018
/*
* ,"https://www.walmart.com/ip/Pirates-of-the-Caribbean-Dead-Men-Tell-No-Tales-mdash-Silent-Mary-Ghost-Ship-Playset/55416926",
* "https://www.walmart.com/ip/FISHER-PRICE-Pirate-Shark-Bite-Ship/49851356",
* "http://www.toysrus.com/product/index.jsp?productId=12113411",
* "http://www.walmart.com/ip/45064558",
* "http://www.barnesandnoble.com/w/learn-like-a-pirate-paul-solarz/1121505455?ean=9780988217669"
* "http://www.amazon.com/LEGO-Pirate-Figure-Pistol-Parrot/dp/B00270WI4C/ref=sr_1_2?ie=UTF8&qid=1449152588&sr=8-2&keywords=pistol+the+pirate+parrots",
* "http://www.amazon.com/Sun-Star-8633%60%60-Pistol-Pirate/dp/B001HTOCFM/ref=pd_sim_t_4",
* "http://www.amazon.com/Pirate-Treasure-Coins-Doubloon-Replicas/dp/B001CICTZS",
* "http://www.amazon.com/Pirates-Booty-White-Cheddar-1-oz/dp/B000JIN1H2/ref=sr_1_1?ie=UTF8&qid=1417705373&sr=8-1&keywords=pirate+booty",
* "http://www.amazon.com/Ellie-Shoes-Adult-Buccaneer-Boots/dp/B000SPJU4Y/ref=sr_1_5?s=apparel&ie=UTF8&qid=1417705949&sr=1-5&keywords=pirate+boots"
*/
};
private String url$ = urls[0];
private String page$;
private String search$;
public static void main(String[] args) {
WebScraper app = new WebScraper();
app.setTitle("WebScraper");
app.setSize(800, 600);
app.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
app.setVisible(true);
}
public WebScraper() {
Font displayFont = new Font("Helvetica", Font.PLAIN, 13);
searchField = new JTextField();
searchField.setFont(displayFont);
add(searchField, BorderLayout.NORTH);
searchField.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
search$ = searchField.getText();
if (search$ != null && search$.length() > 0 && page$.indexOf(search$) > -1) {
// highlight matches:
System.out.println("Search term found!");
Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.GREEN);
int offset = page$.indexOf(search$);
int length = search$.length();
while (offset != -1) {
try {
textArea.getHighlighter().addHighlight(offset, offset + length, painter);
offset = page$.indexOf(search$, offset + 1);
} catch (BadLocationException XoX) {
System.out.println(XoX);
}
}
} else {
// no search term? Remove highlights:
textArea.getHighlighter().removeAllHighlights();
System.out.println("Search term not found");
}
}
});
textArea = new JTextArea();
textArea.setFont(displayFont);
textArea.setText("Results will be displayed here.");
JScrollPane sp = new JScrollPane(textArea);
add(sp, BorderLayout.CENTER);
button = new JButton("Load Page");
button.addActionListener(this);
add(button, BorderLayout.SOUTH);
}
public void actionPerformed(ActionEvent e) {
if (e.getSource() == button) {
textArea.setText("");
try {
// 1. open the url
url = new URL(url$);
// check connection:
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2");
con.setRequestMethod("GET");
int response = con.getResponseCode();
textArea.setText("");
textArea.append("Connecting to URL " + url.toExternalForm() + "\n");
// if we've connected successfully (not generally needed with Amazon):
if (response == HttpURLConnection.HTTP_OK) {
textArea.append("RESPONSE CODE: HTTP_OK\n");
textArea.append("Contents:");
BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));
// 2. read the file
String lineOfData$ = reader.readLine();
while (lineOfData$ != null) {
lineOfData$ = lineOfData$.trim();
if (lineOfData$.length() > 0) {
textArea.append(lineOfData$ + "\n");
}
lineOfData$ = reader.readLine();
}
//fdjfldajflkjafdjasdflj 3. close the file
reader.close();
page$ = textArea.getText();
textArea.append("\n\nPage loaded successfully.\n");
// 4. page$ now contains the text of the webpage
} else {
textArea.setText("Error loading the URL! Response Code: " + response);
}
} catch (MalformedURLException x_x) {
textArea.setText("Error loading the URL (URL Format incorrect)");
x_x.printStackTrace();
} catch (IOException x_x) {
} // end try
} // end if
} // endfdjfldajflkjafdjasdflj actionPerformed()
} // endfdjfldajflkjafdjasdflj WebScraper class