import java.net.URL; import java.net.URLConnection; import java.net.MalformedURLException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.File; import java.io.BufferedOutputStream; import java.io.FileOutputStream; /** This class downloads html pages via http protocol. It downloads html text only. * No rendering of html is done. It assumes your machine is connected to * the internet either directly or via slip or ppp. * The html pages can be saved to a file, displayed to * the screen or placed in string. Don't know how large a file can fit into a string. * * @version 1.0, 3 October 1996 * @author Roger Whitney (<a href=mailto:whitney@cs.sdsu.edu>whitney@cs.sdsu.edu</a>) */ class HttpDownloader { /** * Absolute http address of page to download. Here is an example of an absolute address: * http://www.eli.sdsu.edu/courses/index.html */ private String httpTextUrl; public HttpDownloader( String absoluteURL ) { httpTextUrl = absoluteURL; } /** * Examples of using this class */ public static void main( String args[] ) { System.out.println( "Starting to Access pages" ); String coursesAddress = "http://www.eli.sdsu.edu/courses/index.html"; HttpDownloader courses = new HttpDownloader( coursesAddress ); courses.toScreen(); System.out.println( "Now for second page" ); String notesIndexAddress = "http://www.eli.sdsu.edu/courses/fall96/cs535/notes/index.html"; HttpDownloader notesIndex = new HttpDownloader( notesIndexAddress ); notesIndex.toFile( "index.html" ); System.out.println( "Now for third page" ); String finalStaticAddress = "http://www.eli.sdsu.edu/courses/fall96/cs535/notes/finalNotStatic/finalNotStatic.html"; HttpDownloader finalStaticPage = new HttpDownloader( finalStaticAddress ); System.out.println( finalStaticPage.toString( ) ); } /** * Retrieves a web page and places the html in a string * @return a string containing the html of the web page */ public String toString() { try { int inputChar; int EOF = -1; int smallPageSize = 5000; StringBuffer pageAsString = new StringBuffer( smallPageSize ); InputStream httpStream = getHttpStream(); while ( (inputChar = httpStream.read()) != EOF ) pageAsString.append( ( char ) inputChar ); // cast to char so string interprets as char rather than underlying // unicode int representation return pageAsString.toString(); } catch( IOException ioError ) { System.err.println( "IO err in printing web page to screen" ); return "IO Error in reading web page"; } } /** * Retrieves a web page and displays the html on standard out */ public void toScreen() { try { readInputToOutput( getHttpStream(), System.out ); } catch( IOException ioError ) { System.err.println( "IO err in printing web page to screen" ); } } /** * Retrieves a web page and places the html in a file. Erases previous contents of file. * @param fileName name of file used to store html of the web page */ public void toFile( String fileName ) { try { File fileForHtml = new File( fileName ); BufferedOutputStream bufferedFile; bufferedFile = new BufferedOutputStream( new FileOutputStream( fileForHtml ) ); readInputToOutput( getHttpStream(), bufferedFile ); bufferedFile.close(); } catch( IOException ioError ) { System.err.println( "IO err in saving web page to file: " + fileName ); } } /** * Opens a inputstream to the web server containing * the web page indicated by httpTextUrl * @return the inputstream to the web server */ private InputStream getHttpStream() { try { URL http_URL = new URL( httpTextUrl ); URLConnection httpconnection = http_URL.openConnection(); return httpconnection.getInputStream(); } catch( MalformedURLException ioError ) { System.out.println( "Malformed URL " + httpTextUrl); ioError.printStackTrace(); System.out.println( "Exiting program"); // Exit program, since the course has not covered exceptions // so students don't know how to catch exceptions yet System.exit( 1 ); } catch ( IOException ioError ) { System.out.println( "IO Error on opening http connection" ); ioError.printStackTrace(); System.out.println( "Exiting program"); System.exit( 1 ); } // Program does not reach here, compiler insists on a return return null; } /** * Transfers contents of inputStream in to outputstream out */ private void readInputToOutput( InputStream in, OutputStream out ) throws IOException { int inputChar; int EOF = -1; while ( (inputChar = in.read()) != EOF ) out.write( inputChar ); } } // end class HttpDownloader