import cosc159.action.Action; import cosc159.search.*; import java.util.*; import java.io.*; import javax.swing.text.html.parser.*; import javax.swing.text.html.*; import javax.swing.text.*; import java.net.*; /** * This class solves exercise 3.14 from the second edition of Russell * and Norvig. Note, URLs pointing to directories must end in / for this * to work properly. * * @author Craig A. Struble * @version $Revision$ */ public class WebPath implements Problem { /** * Class for storing state. The URL for the page containing the * URL in question is stored for reference in case relative URLs * are given. This allows us to only create URL objects for * visited nodes. This is important because creating URL * objects is very, very expensive due to security checks, etc. */ class WebState { public URL context; /* the page containing the URL */ public String spec; /* the spec of the URL to visit */ public URL asURL; /** * Return a string representation of this state. */ public String toString() { try { return toURL().toString(); } catch (MalformedURLException e) { return "Invalid URL."; } } /** * Convert the state to a URL. */ public URL toURL() throws MalformedURLException { if (asURL == null) { if (context != null) { asURL = new URL(context, spec); } else { asURL = new URL(spec); } } return asURL; } } /** * Debugging flag */ private static final boolean debug = false; /** * The starting URL. */ WebState source; /** * The target URL. */ URL target; /** * Construct a problem instance taking two parameters. * * @param source The source URL * @param target The target URL */ public WebPath(String source, String target) { this.source = new WebState(); this.source.spec = source; try { this.target = new URL(target); } catch (MalformedURLException e) { System.err.println("Error creating problem: " + e); System.exit(1); } } /** * The starting city for the route finding problem. * @return an object representing the initial problem state. */ public Object initialState() { return source; } /** * Return URLs that can be reached from the specified URL. * @param state the current URL * @return a map of actions and resulting URLs that can be reached. */ public Map successor(Object state) { if (debug) { System.out.println("Successor of " + state); } final TreeSet urls = new TreeSet(); URL context = null; /* Open up the URL */ try { /* Open the URL */ context = ((WebState)state).toURL(); /* Open the connection to the URL, and verify it's an HTML doc */ URLConnection connection = context.openConnection(); connection.connect(); if (connection.getContentType().indexOf("html") != -1) { InputStream stream = connection.getInputStream(); Reader reader = new InputStreamReader(stream); /* Parse the HTML document */ HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback () { public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrSet, int pos) { if (tag == HTML.Tag.A) { /* Only get one copy of the url. */ Object url = attrSet.getAttribute(HTML.Attribute.HREF); if (url != null && !urls.contains(url)) { urls.add(url); if (debug) { System.out.println("URL found: " + url); } } } } }; new ParserDelegator().parse(reader, callback, false); } } catch (Exception e) { if (debug) { System.err.println("Exception: " + e); e.printStackTrace(); } } /* Create map of actions to state */ HashMap map = new HashMap(); for (Iterator i = urls.iterator(); i.hasNext(); ) { Object url = i.next(); WebState nextState = new WebState(); nextState.context = context; nextState.spec = (String)url; map.put(new GoAction(nextState), nextState); } return map; } /** * Return whether or not the specified state is a goal state. * @param state the problem state to test as being a goal state. * @return true if this is a goal state and false otherwise. */ public boolean isGoal(Object state) { System.out.print("."); try { return target.equals(((WebState)state).toURL()); } catch (MalformedURLException e) { return false; } }; /** * Return the cost of taking the specified action. * @param action the action to calculate the cost for. * @return a double representing the cost of taking the action. */ public double stepCost(Object state, Action action) { return 1.0; } /** * Sample program. Assumption is that at least one command line * argument exists. */ public static void main(String[] argv) { WebPath problem = new WebPath(argv[0], argv[1]); Search search; List solution; /* Breadth first search */ System.out.println("Using breadth first search."); search = GeneralSearch.breadthFirstGraphSearch(problem); solution = search.execute(); System.out.println(""); if (solution != null) { for (Iterator i = solution.iterator(); i.hasNext(); ) { System.out.println(i.next().toString()); } } else { System.out.println("No path found."); } } }