Writing your own Tomcat Valve

You can extend from org.apache.catalina.valves.ValveBase and implement the method invoke. The following example just prints out is has been called and invokes the next Valve.

MyValve.java:

import javax.servlet.http.*;
import javax.servlet.*;
import java.util.*;
import java.io.*;
import org.apache.catalina.valves.*;
import org.apache.catalina.*;
 
public class MyValve extends ValveBase
{
   /**
    * The descriptive information related to this implementation.
    */
   private static final String info = "MyValve/1.0";
 
   /**
    * Return descriptive information about this Valve implementation.
    */
   public String getInfo() {
      return (info);
   }
 
   public void invoke(Request request, Response response, ValveContext context)
                             throws IOException, ServletException {
      // Skip logging for non-HTTP requests and responses
      if (!(request instanceof HttpRequest) ||
          !(response instanceof HttpResponse)) {
         context.invokeNext(request, response);
         return;
      }
 
      HttpRequest httpRequest   = (HttpRequest) request;
      HttpResponse httpResponse = (HttpResponse) response;
      HttpServletRequest httpServletRequest   = (HttpServletRequest) httpRequest.getRequest();
      HttpServletResponse httpServletResponse = (HttpServletResponse) httpResponse.getResponse();
 
      System.out.println("nnnMyValve invokednnn");
 
      // continue processing the request
      context.invokeNext(request, response);
   }
 
   public String toString() {
      StringBuffer sb = new StringBuffer();
      sb.append("MyValve[");
      if (container != null) {
         sb.append(container);
      }
      sb.append("]");
      return sb.toString();
   }
}

Compile it with catalina.jar and servlet.jar in your classpath and place it in /server/classes.

Retrieving the links in an HTML document

LinkExtractor.java:

package htmltools;
 
import java.net.URL;
import java.net.InetAddress;
import java.net.MalformedURLException;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;

import java.util.Collection;
import java.util.ArrayList;

import javax.swing.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.*;

/**
 * This class takes a URL and, if it is valid, extracts all the external 
 * and local links and stores them in distinct ArrayLists.
 * It provides accessors to the two lists.
 */
public class LinkExtractor
{
    private URL m_zURL = null;
    private CallbackHandler m_zHandler;
    
    /**
     * Initialize the URL. 
     * You can provide URLs in the following form:<br>
     * <font color="blue">
     * http://www.something.ext<br>
     * www.something.ext<br>
     * something.ext<br>
     * </font>
     * In the last case the extractor assumes the URL is on the local
     * host and tries to open it at the local host
     */
    public LinkExtractor(String sURL) throws MalformedURLException, IOException {
	/* End-users don't like typing http, so 
	   we'll give them a hand */
	if (sURL.startsWith("www.")) {
	    sURL = "http://"+sURL;
	} else if (!sURL.startsWith("http")) {
	    /* there is neither an http protocol specified,
	       and the address does not start with www.
	       We will try to find this document on the local host.
	       Of course, this behavior does not cover all cases.
	       For example the user may try an ftp protocol, 
	       or, accostomed to modern day browsers, omit www altogether.
	       Oh well ...
	    */
	    InetAddress zAddr = InetAddress.getLocalHost();
	    sURL = "http://"+zAddr.getHostName()+"/"+sURL;
	}
	
	m_zURL = new URL(sURL);
	m_zHandler = new CallbackHandler();
	parse();
    }

    /**
     * return and ArrayList of all external links
     */
    public Collection getExternalLinks()  {
	if (null == m_zURL)
	    return null;
	return m_zHandler.m_clExternalLinks;    
    }
    
    /**
     * return and ArrayList of all local links
     */ 
    public Collection getLocalLinks()  {
	if (null == m_zURL)
	    return null;
	return m_zHandler.m_clLocalLinks;    
    }
  
  private void parse() throws IOException {
    // establish connection to site
      BufferedReader zReader = new BufferedReader
	  (new InputStreamReader(m_zURL.openStream()));
      // parse it to get the links
      new ParserDelegator().parse(zReader, m_zHandler, true);
      zReader.close();
  }
    
    private class CallbackHandler extends HTMLEditorKit.ParserCallback 
    {
	ArrayList m_clExternalLinks;
	ArrayList m_clLocalLinks;
	
	public CallbackHandler() {
	    
	    m_clExternalLinks = new ArrayList();
	    m_clLocalLinks = new ArrayList();
	}
	
	/**
	 * Invoked when text in the html document is encountered. Based on
	 * the current state, this will either do nothing
	 * or add an href attribute
	 */
	public void handleText(char[] data, int pos) {
	    // System.out.println(new String(data));
	}
	/**
	 * Invoked when a start tag is encountered. 
	 */
	public void handleStartTag(HTML.Tag zTag, 
				   MutableAttributeSet zAttributes,
				   int iPosition) {
	    String sLink = null;
	    
	    if (zTag.equals(HTML.Tag.A) ||
		zTag.equals(HTML.Tag.ADDRESS)) {
		
		sLink = (String)zAttributes.getAttribute(HTML.Attribute.HREF);
		if (null == sLink) { 
		    
		} else if (sLink.startsWith("http")) {
		    if (!m_clExternalLinks.contains((String)sLink)) {
			m_clExternalLinks.add((String)sLink);
		    }
		} else if (!m_clLocalLinks.contains((String)sLink)) {
		    m_clLocalLinks.add((String)sLink);
		} 
	    }
	}
	
	
	/**
	 * Invoked when the end of a tag is encountered. 
	 */
	public void handleEndTag(HTML.Tag t, int pos) {
	    
	}	    
   }

    /*
     * The main method is provided only for testing.
     */
    static void main(String[] asArgs) throws Exception {
	if (asArgs.length < 1) {
	    System.out.println("Usage: java GetLinks <URL>");
	    System.exit(0);
	}
	String sURL = asArgs[0];

	LinkExtractor gl = new LinkExtractor(sURL);
	ArrayList clLinks = (ArrayList) gl.getExternalLinks();
	
	for (int i=0;i<clLinks.size();i++) {
	    System.out.println((String)clLinks.get(i));
	}
	
	ArrayList clLocalLinks = (ArrayList) gl.getLocalLinks();	
	for (int i=0;i<clLocalLinks.size();i++) {
	    System.out.println((String)clLocalLinks.get(i));
	}
    }
}