Package nu.validator.htmlparser.impl
Class MetaScanner
- java.lang.Object
-
- nu.validator.htmlparser.impl.MetaScanner
-
- Direct Known Subclasses:
MetaSniffer
public abstract class MetaScanner extends java.lang.Object
-
-
Field Summary
Fields Modifier and Type Field Description private static int
A
private static int
AFTER_ATTRIBUTE_NAME
private static int
AFTER_ATTRIBUTE_VALUE_QUOTED
private static int
ATTRIBUTE_NAME
private static int
ATTRIBUTE_VALUE_DOUBLE_QUOTED
private static int
ATTRIBUTE_VALUE_SINGLE_QUOTED
private static int
ATTRIBUTE_VALUE_UNQUOTED
private static int
BEFORE_ATTRIBUTE_NAME
private static int
BEFORE_ATTRIBUTE_VALUE
private java.lang.String
charset
private static char[]
CHARSET
Constant for "charset".private int
charsetIndex
The current position in recognizing the attribute name "charset".private static int
COMMENT
private static int
COMMENT_END
private static int
COMMENT_END_DASH
private static int
COMMENT_START
private static int
COMMENT_START_DASH
private java.lang.String
content
private static char[]
CONTENT
Constant for "content".private static char[]
CONTENT_TYPE
Constant for "content-type".private int
contentIndex
The current position in recognizing the attribute name "content".private int
contentTypeIndex
The current position in recognizing the attribute value "content-type".private static int
DATA
private static int
E
private static char[]
HTTP_EQUIV
Constant for "http-equiv".private static int
HTTP_EQUIV_CONTENT_TYPE
private static int
HTTP_EQUIV_NOT_SEEN
private static int
HTTP_EQUIV_OTHER
private int
httpEquivIndex
The current position in recognizing the attribute name "http-equive".private int
httpEquivState
private static int
M
private static int
MARKUP_DECLARATION_HYPHEN
private static int
MARKUP_DECLARATION_OPEN
private int
metaState
The state of the state machine that recognizes the tag name "meta".private static int
NO
protected ByteReadable
readable
The data source.private static int
SCAN_UNTIL_GT
private static int
SELF_CLOSING_START_TAG
protected int
stateSave
The tokenizer state.private char[]
strBuf
Accumulation buffer for attribute values.private int
strBufLen
The currently filled length of strBuf.private static int
T
private static int
TAG_NAME
private static int
TAG_OPEN
-
Constructor Summary
Constructors Constructor Description MetaScanner()
-
Method Summary
All Methods Instance Methods Abstract Methods Concrete Methods Modifier and Type Method Description private void
addToBuffer(int c)
Adds a character to the accumulation buffer.private void
destructor()
private void
handleAttributeValue()
Attempts to extract a charset name from the accumulation buffer.private void
handleCharInAttributeValue(int c)
private boolean
handleTag()
private boolean
handleTagInner()
protected int
read()
Reads a byte from the data source.protected void
stateLoop(int state)
The runs the meta scanning algorithm.private int
toAsciiLowerCase(int c)
protected abstract boolean
tryCharset(java.lang.String encoding)
Tries to switch to an encoding.
-
-
-
Field Detail
-
CHARSET
private static final char[] CHARSET
Constant for "charset".
-
CONTENT
private static final char[] CONTENT
Constant for "content".
-
HTTP_EQUIV
private static final char[] HTTP_EQUIV
Constant for "http-equiv".
-
CONTENT_TYPE
private static final char[] CONTENT_TYPE
Constant for "content-type".
-
NO
private static final int NO
- See Also:
- Constant Field Values
-
M
private static final int M
- See Also:
- Constant Field Values
-
E
private static final int E
- See Also:
- Constant Field Values
-
T
private static final int T
- See Also:
- Constant Field Values
-
A
private static final int A
- See Also:
- Constant Field Values
-
DATA
private static final int DATA
- See Also:
- Constant Field Values
-
TAG_OPEN
private static final int TAG_OPEN
- See Also:
- Constant Field Values
-
SCAN_UNTIL_GT
private static final int SCAN_UNTIL_GT
- See Also:
- Constant Field Values
-
TAG_NAME
private static final int TAG_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_NAME
private static final int BEFORE_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
ATTRIBUTE_NAME
private static final int ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_NAME
private static final int AFTER_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_VALUE
private static final int BEFORE_ATTRIBUTE_VALUE
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_DOUBLE_QUOTED
private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_SINGLE_QUOTED
private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_UNQUOTED
private static final int ATTRIBUTE_VALUE_UNQUOTED
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_VALUE_QUOTED
private static final int AFTER_ATTRIBUTE_VALUE_QUOTED
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_OPEN
private static final int MARKUP_DECLARATION_OPEN
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_HYPHEN
private static final int MARKUP_DECLARATION_HYPHEN
- See Also:
- Constant Field Values
-
COMMENT_START
private static final int COMMENT_START
- See Also:
- Constant Field Values
-
COMMENT_START_DASH
private static final int COMMENT_START_DASH
- See Also:
- Constant Field Values
-
COMMENT
private static final int COMMENT
- See Also:
- Constant Field Values
-
COMMENT_END_DASH
private static final int COMMENT_END_DASH
- See Also:
- Constant Field Values
-
COMMENT_END
private static final int COMMENT_END
- See Also:
- Constant Field Values
-
SELF_CLOSING_START_TAG
private static final int SELF_CLOSING_START_TAG
- See Also:
- Constant Field Values
-
HTTP_EQUIV_NOT_SEEN
private static final int HTTP_EQUIV_NOT_SEEN
- See Also:
- Constant Field Values
-
HTTP_EQUIV_CONTENT_TYPE
private static final int HTTP_EQUIV_CONTENT_TYPE
- See Also:
- Constant Field Values
-
HTTP_EQUIV_OTHER
private static final int HTTP_EQUIV_OTHER
- See Also:
- Constant Field Values
-
readable
protected ByteReadable readable
The data source.
-
metaState
private int metaState
The state of the state machine that recognizes the tag name "meta".
-
contentIndex
private int contentIndex
The current position in recognizing the attribute name "content".
-
charsetIndex
private int charsetIndex
The current position in recognizing the attribute name "charset".
-
httpEquivIndex
private int httpEquivIndex
The current position in recognizing the attribute name "http-equive".
-
contentTypeIndex
private int contentTypeIndex
The current position in recognizing the attribute value "content-type".
-
stateSave
protected int stateSave
The tokenizer state.
-
strBufLen
private int strBufLen
The currently filled length of strBuf.
-
strBuf
private char[] strBuf
Accumulation buffer for attribute values.
-
content
private java.lang.String content
-
charset
private java.lang.String charset
-
httpEquivState
private int httpEquivState
-
-
Method Detail
-
destructor
private void destructor()
-
read
protected int read() throws java.io.IOException
Reads a byte from the data source. -1 means end.- Returns:
- Throws:
java.io.IOException
-
stateLoop
protected final void stateLoop(int state) throws org.xml.sax.SAXException, java.io.IOException
The runs the meta scanning algorithm.- Throws:
org.xml.sax.SAXException
java.io.IOException
-
handleCharInAttributeValue
private void handleCharInAttributeValue(int c)
-
toAsciiLowerCase
private int toAsciiLowerCase(int c)
-
addToBuffer
private void addToBuffer(int c)
Adds a character to the accumulation buffer.- Parameters:
c
- the character to add
-
handleAttributeValue
private void handleAttributeValue() throws org.xml.sax.SAXException
Attempts to extract a charset name from the accumulation buffer.- Throws:
org.xml.sax.SAXException
-
handleTag
private boolean handleTag() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
handleTagInner
private boolean handleTagInner() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
tryCharset
protected abstract boolean tryCharset(java.lang.String encoding) throws org.xml.sax.SAXException
Tries to switch to an encoding.- Parameters:
encoding
-- Returns:
true
if successful- Throws:
org.xml.sax.SAXException
-
-