de.rw7.token
Class HtmlTokenizer

java.lang.Object
  |
  +--de.rw7.token.HtmlTokenizer

public class HtmlTokenizer
extends java.lang.Object

Parses a HTML-stream (given as FastInput) into tokens. These tokens may be:

The parser tries to accept any dirty HTML. There are no exceptions for invalid HTML. The parser is specialized to work on only a few interesting tags over a large amount of HTML.

Field Summary
private  TagNode clos
          The registry of the closing tags, the consumer is interested in.
private  int initialMode
           
static int MODE_ABORT
           
static int MODE_IGNORE_COMMENTS
           
static int MODE_PARSE_TEXT
           
static int MODE_STRICT_ATTR
           
static int MODE_STRICT_TAGS
           
private  TagNode open
          The registry of the opening tags, the consumer is interested in.
static java.lang.String PRESENT
           
 
Constructor Summary
HtmlTokenizer(int initialMode)
           
 
Method Summary
 void addClosingTag(java.lang.String name, int code)
           
 void addOpeningTag(java.lang.String name, int code, java.lang.String[] attr)
           
 void addTag(java.lang.String name, int code, java.lang.String[] attr)
           
 void printTree(java.io.PrintStream o)
           
 void read(FastInput in, HtmlConsumer t)
           
 void readComments(FastInput in, boolean ignoreComments)
           
 void removeClosingTag(java.lang.String name)
           
 void removeOpeningTag(java.lang.String name)
           
 void removeTag(java.lang.String name)
           
static void test()
           
 
Methods inherited from class java.lang.Object
<clinit>, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait
 

Field Detail

PRESENT

public static final java.lang.String PRESENT

MODE_ABORT

public static final int MODE_ABORT

MODE_PARSE_TEXT

public static final int MODE_PARSE_TEXT

MODE_IGNORE_COMMENTS

public static final int MODE_IGNORE_COMMENTS

MODE_STRICT_TAGS

public static final int MODE_STRICT_TAGS

MODE_STRICT_ATTR

public static final int MODE_STRICT_ATTR

initialMode

private int initialMode

open

private TagNode open
The registry of the opening tags, the consumer is interested in.

clos

private TagNode clos
The registry of the closing tags, the consumer is interested in.
Constructor Detail

HtmlTokenizer

public HtmlTokenizer(int initialMode)
Method Detail

addOpeningTag

public void addOpeningTag(java.lang.String name,
                          int code,
                          java.lang.String[] attr)

addClosingTag

public void addClosingTag(java.lang.String name,
                          int code)

addTag

public void addTag(java.lang.String name,
                   int code,
                   java.lang.String[] attr)

removeOpeningTag

public void removeOpeningTag(java.lang.String name)

removeClosingTag

public void removeClosingTag(java.lang.String name)

removeTag

public void removeTag(java.lang.String name)

printTree

public void printTree(java.io.PrintStream o)

read

public void read(FastInput in,
                 HtmlConsumer t)
          throws java.io.IOException,
                 FastInput.EndException

readComments

public void readComments(FastInput in,
                         boolean ignoreComments)
                  throws java.io.IOException,
                         FastInput.EndException

test

public static final void test()