I am having some problems with writing a CSV file. The program, I made uses HTML praser to get data from a website. Then it saves that data to a .csv file.
The problem i'm having is that, it doesn't write all the data to the file (instead it truncates some of it) and when there are multiple url's, It doesn't write them at all!
To use the program, first type in a directory, then in the top area, put in these url's.
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=04541GEL2
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31359T8L5
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31395RGT9
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=57643LJU1
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31358RRC9
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31392V6H0
press send info and the press start. It should log everything in the bottom box.
Here is the code to the program:
package com.js.extract;
import java.io.*;
import java.util.*;
import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
public class Extraction_GUI extends JFrame {
/**
*
*/
private static final long serialVersionUID = 1L;
protected JButton start;
protected JButton sendInfo;
protected JTextArea infoArea;
protected static JTextArea log;
protected JScrollPane sp;
protected JScrollPane sp2;
protected JSplitPane pane;
protected JPanel mainPanel;
protected JPanel aPanel;
protected JPanel lPanel;
protected int areaStatus = 0;
static protected Calendar cal = Calendar.getInstance();
protected static ArrayList<String> urls = new ArrayList<String>();
public Extraction_GUI(){
super("Extraction by Jeel Shah");
setSize(660,520);
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setLocationRelativeTo(null);
setResizable(false);
mainPanel = new JPanel();
mainPanel.setLayout(new FlowLayout());
aPanel = new JPanel();
aPanel.setLayout(new FlowLayout());
lPanel = new JPanel();
lPanel.setLayout(new FlowLayout());
start = new JButton("start");
sendInfo = new JButton("send info");
infoArea = new JTextArea("http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5"
,13,55);
log = new JTextArea(10,55);
log.setEditable(false);
sp = new JScrollPane(infoArea);
sp.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
sp2 = new JScrollPane(log);
sp2.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp2.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
aPanel.add(sp);
lPanel.add(sp2);
pane = new JSplitPane(JSplitPane.VERTICAL_SPLIT,aPanel,lPanel);
mainPanel.add(pane);
mainPanel.add(start);
mainPanel.add(sendInfo);
add(mainPanel);
setVisible(true);
final String toSave = JOptionPane.showInputDialog(null, "Please enter where you would like to save your files: ");
log.append("Data will be saved to: "+toSave+"\n");
sendInfo.addActionListener(new infoListener());
start.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
start(toSave);
} catch (ParserException e1) {
e1.printStackTrace();
} catch (IOException e2) {
e2.printStackTrace();
}
}
});
}
// start
public static void sta开发者_运维问答rt(String file) throws ParserException, IOException {
for(int i = 0; i < urls.size();i++) {
BufferedWriter writer = new BufferedWriter(new FileWriter(file+"CUSIP"+i+".csv"));
StringBean sb = new StringBean ();
sb.setLinks(false);
sb.setReplaceNonBreakingSpaces(true);
sb.setCollapse(true);
sb.setURL (urls.get(i));
String toReduce = sb.getStrings ();
StringBuffer buffer = new StringBuffer(toReduce);
if(toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For "+urls.get(i)+"\n");
}else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN")-1);
buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[13];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"),buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"),buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"),buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"),buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"),buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"),buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"),buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"),buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"),buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"),buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"),buffer.indexOf("Current Coupon:"));
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"),buffer.length());
for(String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
writer.flush();
writer.close();
log.append("Harvested: "+urls.get(i)+" successfully"+" \n");
}
log.append("Completed at: "+cal.get(Calendar.HOUR)+":"+cal.get(Calendar.MINUTE)+":"+cal.get(Calendar.SECOND) + "\n");
}
class infoListener implements ActionListener{
public void actionPerformed(ActionEvent arg0) {
String url = infoArea.getText();
StringTokenizer st = new StringTokenizer(url,",");
while(st.hasMoreTokens()) {
urls.add(st.nextToken());
}
log.append("Data Recieved at: "+cal.get(Calendar.HOUR)+":"+cal.get(Calendar.MINUTE)+":"+cal.get(Calendar.SECOND)+"\n");
}
}
public static void main(String[]args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
new Extraction_GUI();
}
});
}
}
NOTE: You will need to download HTML Parser and add it to your build path.
First:
You should add "/" before "CUSIP" in the following line of code:
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "CUSIP" + i + ".csv"));
It should be
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "/CUSIP" + i + ".csv"));
Otherwise, your program can't find the output file.
Second:
You should pass urls, separated by ",". Otherwise your program cannot correctly parse the input.
So the input should be:
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=04541GEL2,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31359T8L5,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31395RGT9,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=57643LJU1,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31358RRC9,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31392V6H0
Third:
Pages with URLs that you specified for input, contain only the following text:
CUSIP: 31392V6H0*
*CUSIP Detail information will be available when this issue settles.
And in this case your program should not write anything to file.
See the following part of your program:
if (toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For " + urls.get(i) + "\n");
} else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN"));
buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[13];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"), buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"), buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"), buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"), buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"), buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"), buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"), buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"), buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"), buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"), buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"), buffer.indexOf("Current Coupon:"));
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"), buffer.length());
for (String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
Forth:
I've launched your program with default url:
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5
It writes all data to the file CUSIP0.csv
.
Here is the output:
Cusip/ISIN:
3133F0GM5
US3133F0GM57
,
Instrument Type:
Freddie Notes
,
Call Type:
American
,
Issue Date:
10/18/2001
,
Issue Price:
100.0
,
Amount Issued:
$20,809,000
,
Lockout Period:
2 Year(s)
,
Currency:
USD
,
Denomination:
$1,000
,
First Payment:
11/15/2001
,
Maturity Date:
10/15/2011
,
Original Coupon:
5.250%
,
Current Coupon:
5.250%
,
The only truncated part is the following:
Underwriters: LASALLE FINANCIAL SERVICES, INC.
It's because, you have forgotten to parse it.
Conclusion:
The following code works, but writes more data to the output file then you probably want. You should think how to properly remove the ending.
package com.js.extract;
import java.io.*;
import java.util.*;
import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
public class Extraction_GUI extends JFrame {
/**
*
*/
private static final long serialVersionUID = 1L;
protected JButton start;
protected JButton sendInfo;
protected JTextArea infoArea;
protected static JTextArea log;
protected JScrollPane sp;
protected JScrollPane sp2;
protected JSplitPane pane;
protected JPanel mainPanel;
protected JPanel aPanel;
protected JPanel lPanel;
protected int areaStatus = 0;
static protected Calendar cal = Calendar.getInstance();
protected static ArrayList<String> urls = new ArrayList<String>();
public Extraction_GUI() {
super("Extraction by Jeel Shah");
setSize(660, 520);
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setLocationRelativeTo(null);
setResizable(false);
mainPanel = new JPanel();
mainPanel.setLayout(new FlowLayout());
aPanel = new JPanel();
aPanel.setLayout(new FlowLayout());
lPanel = new JPanel();
lPanel.setLayout(new FlowLayout());
start = new JButton("start");
sendInfo = new JButton("send info");
infoArea = new JTextArea("http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5", 13, 55);
log = new JTextArea(10, 55);
log.setEditable(false);
sp = new JScrollPane(infoArea);
sp.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
sp2 = new JScrollPane(log);
sp2.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp2.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
aPanel.add(sp);
lPanel.add(sp2);
pane = new JSplitPane(JSplitPane.VERTICAL_SPLIT, aPanel, lPanel);
mainPanel.add(pane);
mainPanel.add(start);
mainPanel.add(sendInfo);
add(mainPanel);
setVisible(true);
final String toSave = JOptionPane.showInputDialog(null, "Please enter where you would like to save your files: ");
log.append("Data will be saved to: " + toSave + "\n");
sendInfo.addActionListener(new infoListener());
start.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
start(toSave);
} catch (ParserException e1) {
e1.printStackTrace();
} catch (IOException e2) {
e2.printStackTrace();
}
}
});
}
// start
public static void start(String file) throws ParserException, IOException {
for (int i = 0; i < urls.size(); i++) {
// First mistake was in the following line.
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "/CUSIP" + i + ".csv"));
StringBean sb = new StringBean();
sb.setLinks(false);
sb.setReplaceNonBreakingSpaces(true);
sb.setCollapse(true);
sb.setURL(urls.get(i));
String toReduce = sb.getStrings();
System.out.println("toReduce = \n" + toReduce);
StringBuffer buffer = new StringBuffer(toReduce);
if (toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For " + urls.get(i) + "\n");
} else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN")-1);
// The following line removes info about Underwriters from parsing. So, I commented it.
//buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[14];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"), buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"), buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"), buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"), buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"), buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"), buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"), buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"), buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"), buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"), buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"), buffer.indexOf("Current Coupon:"));
// Some changes are made in that line:
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"), buffer.indexOf("Underwriters:"));
// This line is added.
data[13] = buffer.substring(buffer.indexOf("Underwriters:"), buffer.length());
for (String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
writer.flush();
writer.close();
log.append("Harvested: " + urls.get(i) + " successfully" + " \n");
}
log.append("Completed at: " + cal.get(Calendar.HOUR) + ":" + cal.get(Calendar.MINUTE) + ":" + cal.get(Calendar.SECOND) + "\n");
}
class infoListener implements ActionListener {
public void actionPerformed(ActionEvent arg0) {
String url = infoArea.getText();
StringTokenizer st = new StringTokenizer(url, ",");
while (st.hasMoreTokens()) {
urls.add(st.nextToken());
}
log.append("Data Recieved at: " + cal.get(Calendar.HOUR) + ":" + cal.get(Calendar.MINUTE) + ":" + cal.get(Calendar.SECOND) + "\n");
}
}
public static void main(String[] args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
new Extraction_GUI();
}
});
}
}
精彩评论