Package nu.validator.htmlparser.impl
Class MetaScanner
java.lang.Object
nu.validator.htmlparser.impl.MetaScanner
- Direct Known Subclasses:
MetaSniffer
-
Field Summary
FieldsModifier and TypeFieldDescriptionprivate static final int
private static final int
private static final int
private static final int
private static final int
private static final int
private static final int
private static final int
private static final int
private String
private static final char[]
Constant for "charset".private int
The current position in recognizing the attribute name "charset".private static final int
private static final int
private static final int
private static final int
private static final int
private String
private static final char[]
Constant for "content".private static final char[]
Constant for "content-type".private int
The current position in recognizing the attribute name "content".private int
The current position in recognizing the attribute value "content-type".private static final int
private static final int
private static final char[]
Constant for "http-equiv".private static final int
private static final int
private static final int
private int
The current position in recognizing the attribute name "http-equive".private int
private static final int
private static final int
private static final int
private int
The state of the state machine that recognizes the tag name "meta".private static final int
protected ByteReadable
The data source.private static final int
private static final int
protected int
The tokenizer state.private char[]
Accumulation buffer for attribute values.private int
The currently filled length of strBuf.private static final int
private static final int
private static final int
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprivate void
addToBuffer
(int c) Adds a character to the accumulation buffer.private void
private void
Attempts to extract a charset name from the accumulation buffer.private void
handleCharInAttributeValue
(int c) private boolean
private boolean
protected int
read()
Reads a byte from the data source.protected final void
stateLoop
(int state) The runs the meta scanning algorithm.private int
toAsciiLowerCase
(int c) protected abstract boolean
tryCharset
(String encoding) Tries to switch to an encoding.
-
Field Details
-
CHARSET
private static final char[] CHARSETConstant for "charset". -
CONTENT
private static final char[] CONTENTConstant for "content". -
HTTP_EQUIV
private static final char[] HTTP_EQUIVConstant for "http-equiv". -
CONTENT_TYPE
private static final char[] CONTENT_TYPEConstant for "content-type". -
NO
private static final int NO- See Also:
-
M
private static final int M- See Also:
-
E
private static final int E- See Also:
-
T
private static final int T- See Also:
-
A
private static final int A- See Also:
-
DATA
private static final int DATA- See Also:
-
TAG_OPEN
private static final int TAG_OPEN- See Also:
-
SCAN_UNTIL_GT
private static final int SCAN_UNTIL_GT- See Also:
-
TAG_NAME
private static final int TAG_NAME- See Also:
-
BEFORE_ATTRIBUTE_NAME
private static final int BEFORE_ATTRIBUTE_NAME- See Also:
-
ATTRIBUTE_NAME
private static final int ATTRIBUTE_NAME- See Also:
-
AFTER_ATTRIBUTE_NAME
private static final int AFTER_ATTRIBUTE_NAME- See Also:
-
BEFORE_ATTRIBUTE_VALUE
private static final int BEFORE_ATTRIBUTE_VALUE- See Also:
-
ATTRIBUTE_VALUE_DOUBLE_QUOTED
private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED- See Also:
-
ATTRIBUTE_VALUE_SINGLE_QUOTED
private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED- See Also:
-
ATTRIBUTE_VALUE_UNQUOTED
private static final int ATTRIBUTE_VALUE_UNQUOTED- See Also:
-
AFTER_ATTRIBUTE_VALUE_QUOTED
private static final int AFTER_ATTRIBUTE_VALUE_QUOTED- See Also:
-
MARKUP_DECLARATION_OPEN
private static final int MARKUP_DECLARATION_OPEN- See Also:
-
MARKUP_DECLARATION_HYPHEN
private static final int MARKUP_DECLARATION_HYPHEN- See Also:
-
COMMENT_START
private static final int COMMENT_START- See Also:
-
COMMENT_START_DASH
private static final int COMMENT_START_DASH- See Also:
-
COMMENT
private static final int COMMENT- See Also:
-
COMMENT_END_DASH
private static final int COMMENT_END_DASH- See Also:
-
COMMENT_END
private static final int COMMENT_END- See Also:
-
SELF_CLOSING_START_TAG
private static final int SELF_CLOSING_START_TAG- See Also:
-
HTTP_EQUIV_NOT_SEEN
private static final int HTTP_EQUIV_NOT_SEEN- See Also:
-
HTTP_EQUIV_CONTENT_TYPE
private static final int HTTP_EQUIV_CONTENT_TYPE- See Also:
-
HTTP_EQUIV_OTHER
private static final int HTTP_EQUIV_OTHER- See Also:
-
readable
The data source. -
metaState
private int metaStateThe state of the state machine that recognizes the tag name "meta". -
contentIndex
private int contentIndexThe current position in recognizing the attribute name "content". -
charsetIndex
private int charsetIndexThe current position in recognizing the attribute name "charset". -
httpEquivIndex
private int httpEquivIndexThe current position in recognizing the attribute name "http-equive". -
contentTypeIndex
private int contentTypeIndexThe current position in recognizing the attribute value "content-type". -
stateSave
protected int stateSaveThe tokenizer state. -
strBufLen
private int strBufLenThe currently filled length of strBuf. -
strBuf
private char[] strBufAccumulation buffer for attribute values. -
-
charset
-
httpEquivState
private int httpEquivState
-
-
Constructor Details
-
MetaScanner
public MetaScanner()
-
-
Method Details
-
destructor
private void destructor() -
read
Reads a byte from the data source. -1 means end.- Returns:
- Throws:
IOException
-
stateLoop
The runs the meta scanning algorithm.- Throws:
SAXException
IOException
-
handleCharInAttributeValue
private void handleCharInAttributeValue(int c) -
toAsciiLowerCase
private int toAsciiLowerCase(int c) -
addToBuffer
private void addToBuffer(int c) Adds a character to the accumulation buffer.- Parameters:
c
- the character to add
-
handleAttributeValue
Attempts to extract a charset name from the accumulation buffer.- Throws:
SAXException
-
handleTag
- Throws:
SAXException
-
handleTagInner
- Throws:
SAXException
-
tryCharset
Tries to switch to an encoding.- Parameters:
encoding
-- Returns:
true
if successful- Throws:
SAXException
-