Computer Science 15-110, Spring 2010
Class Notes: More IO:  Strings, Files, and Web Pages (Optional)


  1. Other Input Sources
    1. Read from a String
    2. Read from a File
      1. Word-at-a-time (excluding whitespace)
      2. Line-at-a-time (including whitespace)
    3. (And Write to a File)
    4. Read from a Web Page

More IO:  Strings, Files, and Web Pages

  1. Other Input Sources
     
    1. Read from a String
      import java.util.Scanner;
      class MyCode {
        public static void main(String[] args) {
          String s = "Douglas Noel Adams";
          Scanner scanner = new Scanner(s);
          while (scanner.hasNext())
            System.out.println(scanner.next());
        }
      }

      Another example (with a custom delimiter):

      import java.util.Scanner;
      class MyCode {
        public static void main(String[] args) {
          String s = "Rome,Paris,Peoria,London,Tokyo";
          Scanner scanner = new Scanner(s);
          scanner.useDelimiter(",");
          while (scanner.hasNext())
            System.out.println(scanner.next());
        }
      }
    2. Read from a File
       
      1. Word-at-a-time (excluding whitespace)
        import java.util.Scanner;
        class MyCode  {
          public static void main(String[] args) {
            Scanner scanner = getFileScanner("SampleFile.txt");
            while (scanner.hasNext())
              System.out.println(scanner.next());
          }
        
          // Convenient helper method for reading from a file.
          // Returns null if the file is not found (or for any other error).
          // You are responsible for using this method, but not
          // for writing it (neither on homeworks or tests)!
          public static Scanner getFileScanner(String filename) {
            Scanner scanner = null;
            try {
              java.io.File file = new java.io.File(filename);
              if (file.isAbsolute() == true) {
                scanner = new Scanner(file);
              }
              else {
                // treat relative files as resources so they work in jar files!
                // Note that we cannot use the current thread's classLoader,
                // since this does not work appropriately on Vista nor on some Macs.
                java.io.InputStream is = getDefiningClass().getResourceAsStream(filename);
                scanner = new Scanner(new java.io.BufferedReader(
                                       new java.io.InputStreamReader(is)));
              }
            }
            catch (Exception e) {
              System.out.println("File not found: " + filename);
              return null;
            }
            return scanner;
          }
        
          // Return an instance of the Class class representing the
          // class in which this method is defined.  
          public static Class getDefiningClass() {
            try {
              StackTraceElement[] em = new Exception().getStackTrace();
              return Class.forName(em[0].getClassName());
            } catch (Exception e) {
              throw new RuntimeException("Cannot find current class");
            }
          }
        }
      2. Line-at-a-time (including whitespace)
          public static void main(String[] args) {
            Scanner scanner = getFileScanner("SampleFile.txt");
            while (scanner.hasNext())
              System.out.println(scanner.nextLine());
          }
    3. (And Write to a File)
      import java.io.PrintStream;
      class MyCode  {
        public static void main(String[] args) {
          PrintStream out = getFilePrintStream("MyOutput.txt");
          out.println("This will output to the file 'MyOutput.txt'");
        }
      
        // Convenient helper method for writing to a file.
        // Returns null if the file cannot be opened (or for any other error).
        // You are responsible for using this method, but not
        // for writing it (neither on homeworks or tests)!
        public static PrintStream getFilePrintStream(String filename) {
          PrintStream out = null;
          try { out = new PrintStream(new java.io.File(filename)); }
          catch (Exception e) {
            System.out.println("Error opening file " + filename);
            return null;
          }
          return out;
        }
      }
    4. Read from a Web Page
       
      1. As HTML
        import java.util.Scanner;
        class MyCode  {
          public static void main(String[] args) {
            String url = "http://kosbie.net/cmu/fall-08/15-100/handouts/parsely.html";
            Scanner scanner = getUrlScanner(url);
            while (scanner.hasNext())
              System.out.println(scanner.nextLine());
          }
        
          // Convenient helper method for reading from a web page (url).
          // Returns null if the page cannot be opened (or for any other error).
          // You are responsible for using this method, but not
          // for writing it (neither on homeworks or tests)!
          public static Scanner getUrlScanner(String url) {
            Scanner scanner = null;
            try { scanner = new Scanner(new java.net.URL(url).openStream()); }
            catch (Exception e) {
              System.out.println("Error opening url " + url);
              return null;
            }
            return scanner;
          }
        }
      2. As Plain Text
        import java.util.Scanner;
        class MyCode  {
          public static void main(String[] args) {
            String url = "http://kosbie.net/cmu/fall-08/15-100/handouts/parsely.html";
            Scanner scanner = getUrlTextScanner(url);
            while (scanner.hasNext())
              System.out.println(scanner.nextLine());
          }
        
          // Convenient helper method for reading from a web page (url) as plain text.
          // Returns null if the page cannot be opened (or for any other error).
          // On some pages, especially if they contain XML, the spaces may be elided
          // (sothetextislikethis) -- in that case, try setting the second parameter
          // to " " or "\n", so spaces or newlines are added after each parsed element.
          // You are responsible for using this method, but not
          // for writing it (neither on homeworks or tests)!
        
          public static Scanner getUrlTextScanner(String url) { return getUrlTextScanner(url, null); }
          public static Scanner getUrlTextScanner(String url, final String dataDelimiter) {
            Scanner scanner = null;
            try {
              final StringBuffer sb = new StringBuffer();
              java.io.InputStreamReader reader = new java.io.InputStreamReader(
                                              new java.net.URL(url).openStream());
              javax.swing.text.html.HTMLEditorKit.ParserCallback parser =
                new javax.swing.text.html.HTMLEditorKit.ParserCallback() {
                  public void handleText(char[] data, int pos) {
                    if (data != null) {
                      sb.append(data);
                      if (dataDelimiter != null) sb.append(dataDelimiter);
                    }
                  }
                  public void handleSimpleTag(javax.swing.text.html.HTML.Tag tag,
                                    javax.swing.text.MutableAttributeSet a,
                                    int pos) {
                   if (tag.breaksFlow()) sb.append("\n");
                  }
                };
              new javax.swing.text.html.parser.ParserDelegator().parse(reader, parser, true);
              scanner = new Scanner(sb.toString());
            }
            catch (Exception e) {
              System.out.println("Error opening text reader for url: " + url);
              return null;
            }
            return scanner;
          }
        }

carpe diem   -   carpe diem   -   carpe diem   -   carpe diem   -   carpe diem   -   carpe diem   -   carpe diem   -   carpe diem   -   carpe diem