import org.jsoup.Jsoup;
import javax.swing.*;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.awt.BorderLayout;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Scanner;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
@SuppressWarnings("unused")
public class SimpleWebCrawler extends JFrame {
JTextField yourInputField = new JTextField(20);
static JTextArea _resultArea = new JTextArea(100, 100);
JScrollPane scrollingArea = new JScrollPane(_resultArea);
private final static String newline = "\n";
public SimpleWebCrawler() throws MalformedURLException {
String word2 = yourInputField.getText();
_resultArea.setEditable(false);
try {
UR开发者_Python百科L my_url = new URL("http://" + word2 + "/");
BufferedReader br = new BufferedReader(new InputStreamReader(
my_url.openStream()));
String strTemp = "";
while (null != (strTemp = br.readLine())) {
_resultArea.append(strTemp + newline);
}
} catch (Exception ex) {
ex.printStackTrace();
}
_resultArea.append("\n");
_resultArea.append("\n");
_resultArea.append("\n");
String url = "http://" + word2 + "/";
print("Fetching %s...", url);
try{
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
System.out.println("\n");
BufferedWriter bw = new BufferedWriter(new FileWriter("C:\\Users\\user\\fypworkspace\\FYP\\Link\\abc.txt"));
_resultArea.append("\n");
for (Element link : links) {
print(" %s ", link.attr("abs:href"), trim(link.text(), 35));
bw.write(link.attr("abs:href"));
bw.write(System.getProperty("line.separator"));
}
bw.flush();
bw.close();
} catch (IOException e1) {
}
JPanel content = new JPanel();
content.setLayout(new BorderLayout());
content.add(scrollingArea, BorderLayout.CENTER);
content.add(yourInputField);
this.setContentPane(content);
this.setTitle("Crawled Links");
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
this.pack();
}
private static void print(String msg, Object... args) {
_resultArea.append(String.format(msg, args) +newline);
}
private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width - 1) + ".";
else
return s;
}
//.. Get the content pane, set layout, add to center
public static void main(String[] args) throws IOException {
JFrame win = new SimpleWebCrawler();
win.setVisible(true);
}
}
Hi, this is my code to extract links from a web address. The user will key in the desired URL and this code will extract links from the URL.
This code prompt the user to key in for URL in the ECLIPSE IDE console. After keyed in the input, the code will extract links from the URL and transfer the output to a JTextArea.
What i wanted to do now is, i would like to create a Jtextfield to receive the user input rather than the user key in the input inside the console.
The line of code that is responsible for handling the string input is :
URL my_url = new URL("http://" + word2 + "/");
String url = "http://" + word2 + "/";
however i getting an illegal argument exception which describes
protocol = http host = null.
What do i missing ?
The problem here is you are trying to formulate the URL too early; even before your GUI is created. so word2
is just an empty string so the URL looks like "http:///"
which is not valid. Try to add a JButton
and try to extract links from webpages when the button is pressed .
When the object is constructed, you're creating a new, fresh JTextField. However, you're not displaying this JTextField anywhere, so the user can't enter data into it. Because they can't enter data, then the newly-created JTextField returns null when you call getText().
The best thing to do would be either to display the JTextField on a form and only run the HTTP Crawl after someone clicks "Start", or better still change the constructor to accept a String argument that is the base host for the URL you want to download. Then the SimpleWebCrawler does all the work, and then you can build another class that displays the form.
e.g.
public SimpleWebCrawler(String word2) throws MalformedURLException {
// We don't need this any more
//String word2 = yourInputField.getText();
_resultArea.setEditable(false);
try {
URL my_url = new URL("http://" + word2 + "/");
BufferedReader br = new BufferedReader(new InputStreamReader(
my_url.openStream()));
String strTemp = "";
while (null != (strTemp = br.readLine())) {
_resultArea.append(strTemp + newline);
}
} catch (Exception ex) {
ex.printStackTrace();
}
// Rest of your constructor goes here...
精彩评论