From Dw0rm's Wiki
package org.blitztec.TrademeHelper;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.*;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.util.Properties;
import java.util.regex.*;
public class WebParser {
static Properties props = new Properties();
public static void main (String[] args) {
FileInputStream fin = null;
//Load up the parsing properties:
try {
fin = new FileInputStream("parser.properties");
} catch (FileNotFoundException exc) {exc.printStackTrace();}
try {
if (fin != null) {
props.load(fin);
fin.close();
}
} catch (IOException exc) {exc.printStackTrace();}
URI uri = null;
URL url = null;
try {
//these are for currently running items
//uri = new URI("file:///C:/Documents%20and%20Settings/lukasr/Desktop/blitztec.htm");
//uri = new URI("http://www.trademe.co.nz:80/structure/show_member_listings.asp?member=9914");
//Linux:
uri = new URI("http://www.trademe.co.nz/structure/show_member_listings.asp?member=5658");
//these are for finished items
//uri = new URI("file:///C:/Documents%20and%20Settings/lukasr/Desktop/test/sell_sold.html");
//uri = new URI("file:///home/lukasr/Desktop/sell_sold.html");
//uri = new URI("http://www.trademe.co.nz/structure/my_trademe/sell_sold.asp");
} catch (URISyntaxException e) {
e.printStackTrace();
}
try {
url = uri.toURL();
} catch (IllegalArgumentException e) {
e.printStackTrace();
// URI was not absolute
} catch (MalformedURLException e) {
e.printStackTrace();
}
//URLConnection conn=url.openConnection();
//Reader re=new InputStreamReader(conn.getInputStream());
System.out.println(url);
System.out.println("host:"+url.getHost());
System.out.println("path:"+url.getPath());
System.out.println("protocol:"+url.getProtocol());
System.out.println("authority:"+url.getAuthority());
System.out.println("query:"+url.getQuery());
try {
BufferedReader bufferedUrlReader = new BufferedReader(new InputStreamReader(url.openStream()));
//CallbackExpiredListingsStrategy callbackExpired = new CallbackExpiredListingsStrategy(props);
//new ParserDelegator().parse(bufferedUrlReader, callbackExpired, false);
CallbackCurrentListingsStrategy callbackCurrent = new CallbackCurrentListingsStrategy(props);
new ParserDelegator().parse(bufferedUrlReader, callbackCurrent, false);
System.out.println("===== EOF =====");
bufferedUrlReader.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}