Package nu.validator.htmlparser.impl
Class Tokenizer
- java.lang.Object
-
- nu.validator.htmlparser.impl.Tokenizer
-
- All Implemented Interfaces:
org.xml.sax.Locator
- Direct Known Subclasses:
ErrorReportingTokenizer
public class Tokenizer extends java.lang.Object implements org.xml.sax.Locator
An implementation of http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html This class implements theLocator
interface. This is not an incidental implementation detail: Users of this class are encouraged to make use of theLocator
nature. By default, the tokenizer may report data that XML 1.0 bans. The tokenizer can be configured to treat these conditions as fatal or to coerce the infoset to something that XML 1.0 allows.- Version:
- $Id$
-
-
Field Summary
Fields Modifier and Type Field Description private char
additional
static int
AFTER_ATTRIBUTE_NAME
static int
AFTER_ATTRIBUTE_VALUE_QUOTED
static int
AFTER_DOCTYPE_NAME
static int
AFTER_DOCTYPE_PUBLIC_IDENTIFIER
static int
AFTER_DOCTYPE_PUBLIC_KEYWORD
static int
AFTER_DOCTYPE_SYSTEM_IDENTIFIER
static int
AFTER_DOCTYPE_SYSTEM_KEYWORD
protected LocatorImpl
ampersandLocation
private char[]
astralChar
Buffer for expanding astral NCRs.static int
ATTRIBUTE_NAME
static int
ATTRIBUTE_VALUE_DOUBLE_QUOTED
static int
ATTRIBUTE_VALUE_SINGLE_QUOTED
static int
ATTRIBUTE_VALUE_UNQUOTED
protected AttributeName
attributeName
The current attribute name.private HtmlAttributes
attributes
The attribute holder.static int
BEFORE_ATTRIBUTE_NAME
static int
BEFORE_ATTRIBUTE_VALUE
static int
BEFORE_DOCTYPE_NAME
static int
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
static int
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
static int
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
private char[]
bmpChar
Buffer for expanding NCRs falling into the Basic Multilingual Plane.static int
BOGUS_COMMENT
static int
BOGUS_COMMENT_HYPHEN
static int
BOGUS_DOCTYPE
private static int
BUFFER_GROW_BY
Buffer growth parameter.private int
candidate
private static char[]
CDATA_LSQB
"CDATA[" aschar[]
static int
CDATA_RSQB
static int
CDATA_RSQB_RSQB
static int
CDATA_SECTION
static int
CDATA_START
static int
CHARACTER_REFERENCE_HILO_LOOKUP
static int
CHARACTER_REFERENCE_TAIL
static int
CLOSE_TAG_OPEN
static int
COMMENT
static int
COMMENT_END
static int
COMMENT_END_BANG
static int
COMMENT_END_DASH
static int
COMMENT_START
static int
COMMENT_START_DASH
private XmlViolationPolicy
commentPolicy
The policy for comments.protected boolean
confident
static int
CONSUME_CHARACTER_REFERENCE
static int
CONSUME_NCR
private XmlViolationPolicy
contentSpacePolicy
The policy for vertical tab and form feed.protected int
cstart
static int
DATA
private static int
DATA_AND_RCDATA_MASK
static int
DECIMAL_NRC_LOOP
static int
DOCTYPE
static int
DOCTYPE_NAME
static int
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
static int
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
static int
DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
static int
DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
static int
DOCTYPE_UBLIC
static int
DOCTYPE_YSTEM
private java.lang.String
doctypeName
The name of the current doctype token.protected EncodingDeclarationHandler
encodingDeclarationHandler
protected boolean
endTag
true
if tokenizing an end tagprotected ElementName
endTagExpectation
The element whose end tag closes the current CDATA or RCDATA element.private char[]
endTagExpectationAsArray
private int
entCol
protected org.xml.sax.ErrorHandler
errorHandler
The error handler.private int
firstCharKey
private boolean
forceQuirks
static int
HANDLE_NCR_VALUE
static int
HANDLE_NCR_VALUE_RECONSUME
static int
HEX_NCR_LOOP
private int
hi
protected boolean
html4
true
when HTML4-specific additional errors are requested.private boolean
html4ModeCompatibleWithXhtml1Schemata
private static char[]
IFRAME_ARR
protected int
index
private Interner
interner
protected boolean
lastCR
Whether the previous char read was CR.private static int
LEAD_OFFSET
Magic value for UTF-16 operations.private static char[]
LF
Array version of line feed.private int
line
private int
lo
private char[]
longStrBuf
Buffer for long strings.private int
longStrBufLen
Number of significantchar
s inlongStrBuf
.private static char[]
LT_GT
UTF-16 code unit array containing less than and greater than for emitting those characters on certain parse errors.private static char[]
LT_SOLIDUS
UTF-16 code unit array containing less than and solidus for emitting those characters on certain parse errors.private int
mappingLangToXmlLang
static int
MARKUP_DECLARATION_HYPHEN
static int
MARKUP_DECLARATION_OCTYPE
static int
MARKUP_DECLARATION_OPEN
private boolean
metaBoundaryPassed
Whether the stream is past the first 512 bytes.private XmlViolationPolicy
namePolicy
private boolean
newAttributesEachTime
private static char[]
NOEMBED_ARR
private static char[]
NOFRAMES_ARR
static int
NON_DATA_END_TAG_NAME
private static char[]
NOSCRIPT_ARR
private static char[]
OCTYPE
"octype" aschar[]
static int
PLAINTEXT
private static char[]
PLAINTEXT_ARR
private int
prevValue
static int
PROCESSING_INSTRUCTION
static int
PROCESSING_INSTRUCTION_QUESTION_MARK
private java.lang.String
publicId
The SAX public id for the resource being tokenized.private java.lang.String
publicIdentifier
The public id of the current doctype token.static int
RAWTEXT
static int
RAWTEXT_RCDATA_LESS_THAN_SIGN
static int
RCDATA
private static char[]
REPLACEMENT_CHARACTER
Array version of U+FFFD.private int
returnStateSave
private static char[]
RSQB_RSQB
UTF-16 code unit array containing ]] for emitting those characters on state transitions.private static char[]
SCRIPT_ARR
static int
SCRIPT_DATA
static int
SCRIPT_DATA_DOUBLE_ESCAPE_END
static int
SCRIPT_DATA_DOUBLE_ESCAPE_START
static int
SCRIPT_DATA_DOUBLE_ESCAPED
static int
SCRIPT_DATA_DOUBLE_ESCAPED_DASH
static int
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
static int
SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
static int
SCRIPT_DATA_ESCAPE_START
static int
SCRIPT_DATA_ESCAPE_START_DASH
static int
SCRIPT_DATA_ESCAPED
static int
SCRIPT_DATA_ESCAPED_DASH
static int
SCRIPT_DATA_ESCAPED_DASH_DASH
static int
SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
static int
SCRIPT_DATA_LESS_THAN_SIGN
private boolean
seenDigits
static int
SELF_CLOSING_START_TAG
private boolean
shouldSuspend
private static char[]
SPACE
Array version of space.protected int
stateSave
private char[]
strBuf
Buffer for short identifiers.private int
strBufLen
Number of significantchar
s instrBuf
.private int
strBufMark
private static char[]
STYLE_ARR
private java.lang.String
systemId
The SAX system id for the resource being tokenized.private java.lang.String
systemIdentifier
The system id of the current doctype token.static int
TAG_NAME
static int
TAG_OPEN
private ElementName
tagName
The current tag token name.private static char[]
TEXTAREA_ARR
private static char[]
TITLE_ARR
protected TokenHandler
tokenHandler
The token handler.private static char[]
UBLIC
"ublic" aschar[]
protected int
value
private boolean
wantsComments
Whether comment tokens are emitted.private XmlViolationPolicy
xmlnsPolicy
private static char[]
XMP_ARR
private static char[]
YSTEM
"ystem" aschar[]
-
Constructor Summary
Constructors Constructor Description Tokenizer(TokenHandler tokenHandler)
The constructor.Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description private void
addAttributeWithoutValue()
private void
addAttributeWithValue()
private void
adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
private void
adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
private void
adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
private void
appendLongStrBuf(char c)
Appends to the larger buffer.private void
appendLongStrBuf(char[] buffer, int offset, int length)
private void
appendLongStrBufCarriageReturn()
private void
appendLongStrBufLineFeed()
private void
appendSecondHyphenToBogusComment()
private void
appendStrBuf(char c)
Appends to the smaller buffer.private void
appendStrBufToLongStrBuf()
Append the contents of the smaller buffer to the larger one.private void
attributeNameComplete()
void
becomeConfident()
private void
bogusDoctype()
private void
bogusDoctypeWithoutQuirks()
protected char
checkChar(char[] buf, int pos)
private void
clearLongStrBuf()
private void
clearLongStrBufAndAppend(char c)
private void
clearStrBuf()
private void
clearStrBufAndAppend(char c)
(package private) void
destructor()
private void
emitCarriageReturn(char[] buf, int pos)
private void
emitComment(int provisionalHyphens, int pos)
Emits the current comment token.private int
emitCurrentTagToken(boolean selfClosing, int pos)
private void
emitDoctypeToken(int pos)
private void
emitOrAppendOne(char[] val, int returnState)
private void
emitOrAppendStrBuf(int returnState)
private void
emitOrAppendTwo(char[] val, int returnState)
private void
emitPlaintextReplacementCharacter(char[] buf, int pos)
private void
emitReplacementCharacter(char[] buf, int pos)
private void
emitStrBuf()
Emits the smaller buffer as character tokens.(package private) HtmlAttributes
emptyAttributes()
void
end()
private void
endTagExpectationToArray()
void
eof()
void
err(java.lang.String message)
Reports a Parse Error.protected void
errAstralNonCharacter(int ch)
protected void
errAttributeValueMissing()
protected void
errBadCharAfterLt(char c)
protected void
errBadCharBeforeAttributeNameOrNull(char c)
protected void
errBogusComment()
protected void
errBogusDoctype()
protected void
errCharRefLacksSemicolon()
protected void
errConsecutiveHyphens()
protected void
errDuplicateAttribute()
protected void
errEofAfterLt()
protected void
errEofInAttributeName()
protected void
errEofInAttributeValue()
protected void
errEofInComment()
protected void
errEofInDoctype()
protected void
errEofInEndTag()
protected void
errEofInPublicId()
protected void
errEofInSystemId()
protected void
errEofInTagName()
protected void
errEofWithoutGt()
protected void
errEqualsSignBeforeAttributeName()
protected void
errExpectedPublicId()
protected void
errExpectedSystemId()
protected void
errGarbageAfterLtSlash()
protected void
errGtInPublicId()
protected void
errGtInSystemId()
protected void
errHtml4LtSlashInRcdata(char folded)
protected void
errHtml4NonNameInUnquotedAttribute(char c)
protected void
errHtml4XmlVoidSyntax()
protected void
errHyphenHyphenBang()
protected void
errLtGt()
protected void
errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
protected void
errLtSlashGt()
protected void
errMissingSpaceBeforeDoctypeName()
protected void
errNamelessDoctype()
protected void
errNcrControlChar()
protected char
errNcrControlChar(char ch)
protected void
errNcrCr()
protected void
errNcrInC1Range()
protected char
errNcrNonCharacter(char ch)
protected void
errNcrOutOfRange()
protected void
errNcrSurrogate()
protected void
errNcrUnassigned()
protected void
errNcrZero()
protected void
errNoDigitsInNCR()
protected void
errNoNamedCharacterMatch()
protected void
errNoSpaceBetweenAttributes()
protected void
errNoSpaceBetweenDoctypePublicKeywordAndQuote()
protected void
errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
protected void
errNoSpaceBetweenPublicAndSystemIds()
protected void
errNotSemicolonTerminated()
protected void
errPrematureEndOfComment()
protected void
errProcessingInstruction()
protected void
errQuoteBeforeAttributeName(char c)
protected void
errQuoteOrLtInAttributeNameOrNull(char c)
protected void
errSlashNotFollowedByGt()
void
errTreeBuilder(java.lang.String message)
protected void
errUnescapedAmpersandInterpretedAsCharacterReference()
protected void
errUnquotedAttributeValOrNull(char c)
protected void
errWarnLtSlashInRcdata()
void
fatal(java.lang.String message)
Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.protected void
flushChars(char[] buf, int pos)
Flushes coalesced character tokens.int
getCol()
Returns the col.int
getColumnNumber()
org.xml.sax.ErrorHandler
getErrorHandler()
int
getLine()
Returns the line.int
getLineNumber()
java.lang.String
getPublicId()
java.lang.String
getSystemId()
private void
handleNcrValue(int returnState)
private void
initDoctypeFields()
void
initializeWithoutStarting()
void
initLocation(java.lang.String newPublicId, java.lang.String newSystemId)
boolean
internalEncodingDeclaration(java.lang.String internalCharset)
boolean
isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.boolean
isInDataState()
boolean
isMappingLangToXmlLang()
Returns the mappingLangToXmlLang.boolean
isNextCharOnNewLine()
Returns the nextCharOnNewLine.boolean
isPrevCR()
void
loadState(Tokenizer other)
private java.lang.String
longStrBufToString()
The larger buffer as a string.private void
maybeAppendSpaceToBogusComment()
protected void
maybeErrAttributesOnEndTag(HtmlAttributes attrs)
protected void
maybeErrSlashInEndTag(boolean selfClosing)
protected void
maybeWarnPrivateUse(char ch)
protected void
maybeWarnPrivateUseAstral()
private static java.lang.String
newAsciiLowerCaseStringFromString(java.lang.String str)
protected void
noteAttributeWithoutValue()
protected void
noteUnquotedAttributeValue()
void
notifyAboutMetaBoundary()
void
requestSuspension()
private void
resetAttributes()
void
resetToDataState()
private void
setAdditionalAndRememberAmpersandLocation(char add)
void
setCommentPolicy(XmlViolationPolicy commentPolicy)
Sets the commentPolicy.void
setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.void
setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Sets the contentSpacePolicy.void
setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
Sets the encodingDeclarationHandler.void
setErrorHandler(org.xml.sax.ErrorHandler eh)
Sets the error handler.void
setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Sets the html4ModeCompatibleWithXhtml1Schemata.void
setInterner(Interner interner)
void
setLineNumber(int line)
For C++ use only.void
setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Sets the mappingLangToXmlLang.void
setNamePolicy(XmlViolationPolicy namePolicy)
void
setStateAndEndTagExpectation(int specialTokenizerState, java.lang.String endTagExpectation)
Sets the tokenizer state and the associated element name.void
setStateAndEndTagExpectation(int specialTokenizerState, ElementName endTagExpectation)
Sets the tokenizer state and the associated element name.void
setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported toTransitionHandler
.void
setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Sets the xmlnsPolicy.protected void
silentCarriageReturn()
protected void
silentLineFeed()
void
start()
protected void
startErrorReporting()
private int
stateLoop(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos)
private void
strBufToDoctypeName()
Returns the short buffer as a local name.private void
strBufToElementNameString()
protected java.lang.String
strBufToString()
The smaller buffer as a String.boolean
tokenizeBuffer(UTF16Buffer buffer)
protected int
transition(int from, int to, boolean reconsume, int pos)
(package private) void
turnOnAdditionalHtml4Errors()
void
warn(java.lang.String message)
Reports a warningprivate long
workAroundHotSpotHugeMethodLimit(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos)
compressed returnValue: int returnState = returnValue >> 33 boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0) int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
-
-
-
Field Detail
-
DATA_AND_RCDATA_MASK
private static final int DATA_AND_RCDATA_MASK
- See Also:
- Constant Field Values
-
DATA
public static final int DATA
- See Also:
- Constant Field Values
-
RCDATA
public static final int RCDATA
- See Also:
- Constant Field Values
-
SCRIPT_DATA
public static final int SCRIPT_DATA
- See Also:
- Constant Field Values
-
RAWTEXT
public static final int RAWTEXT
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPED
public static final int SCRIPT_DATA_ESCAPED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_DOUBLE_QUOTED
public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_SINGLE_QUOTED
public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_UNQUOTED
public static final int ATTRIBUTE_VALUE_UNQUOTED
- See Also:
- Constant Field Values
-
PLAINTEXT
public static final int PLAINTEXT
- See Also:
- Constant Field Values
-
TAG_OPEN
public static final int TAG_OPEN
- See Also:
- Constant Field Values
-
CLOSE_TAG_OPEN
public static final int CLOSE_TAG_OPEN
- See Also:
- Constant Field Values
-
TAG_NAME
public static final int TAG_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_NAME
public static final int BEFORE_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
ATTRIBUTE_NAME
public static final int ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_NAME
public static final int AFTER_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_VALUE
public static final int BEFORE_ATTRIBUTE_VALUE
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_VALUE_QUOTED
public static final int AFTER_ATTRIBUTE_VALUE_QUOTED
- See Also:
- Constant Field Values
-
BOGUS_COMMENT
public static final int BOGUS_COMMENT
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_OPEN
public static final int MARKUP_DECLARATION_OPEN
- See Also:
- Constant Field Values
-
DOCTYPE
public static final int DOCTYPE
- See Also:
- Constant Field Values
-
BEFORE_DOCTYPE_NAME
public static final int BEFORE_DOCTYPE_NAME
- See Also:
- Constant Field Values
-
DOCTYPE_NAME
public static final int DOCTYPE_NAME
- See Also:
- Constant Field Values
-
AFTER_DOCTYPE_NAME
public static final int AFTER_DOCTYPE_NAME
- See Also:
- Constant Field Values
-
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
- See Also:
- Constant Field Values
-
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
- See Also:
- Constant Field Values
-
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
- See Also:
- Constant Field Values
-
AFTER_DOCTYPE_PUBLIC_IDENTIFIER
public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
- See Also:
- Constant Field Values
-
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
- See Also:
- Constant Field Values
-
DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
- See Also:
- Constant Field Values
-
DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
- See Also:
- Constant Field Values
-
AFTER_DOCTYPE_SYSTEM_IDENTIFIER
public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
- See Also:
- Constant Field Values
-
BOGUS_DOCTYPE
public static final int BOGUS_DOCTYPE
- See Also:
- Constant Field Values
-
COMMENT_START
public static final int COMMENT_START
- See Also:
- Constant Field Values
-
COMMENT_START_DASH
public static final int COMMENT_START_DASH
- See Also:
- Constant Field Values
-
COMMENT
public static final int COMMENT
- See Also:
- Constant Field Values
-
COMMENT_END_DASH
public static final int COMMENT_END_DASH
- See Also:
- Constant Field Values
-
COMMENT_END
public static final int COMMENT_END
- See Also:
- Constant Field Values
-
COMMENT_END_BANG
public static final int COMMENT_END_BANG
- See Also:
- Constant Field Values
-
NON_DATA_END_TAG_NAME
public static final int NON_DATA_END_TAG_NAME
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_HYPHEN
public static final int MARKUP_DECLARATION_HYPHEN
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_OCTYPE
public static final int MARKUP_DECLARATION_OCTYPE
- See Also:
- Constant Field Values
-
DOCTYPE_UBLIC
public static final int DOCTYPE_UBLIC
- See Also:
- Constant Field Values
-
DOCTYPE_YSTEM
public static final int DOCTYPE_YSTEM
- See Also:
- Constant Field Values
-
AFTER_DOCTYPE_PUBLIC_KEYWORD
public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD
- See Also:
- Constant Field Values
-
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
- See Also:
- Constant Field Values
-
AFTER_DOCTYPE_SYSTEM_KEYWORD
public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD
- See Also:
- Constant Field Values
-
CONSUME_CHARACTER_REFERENCE
public static final int CONSUME_CHARACTER_REFERENCE
- See Also:
- Constant Field Values
-
CONSUME_NCR
public static final int CONSUME_NCR
- See Also:
- Constant Field Values
-
CHARACTER_REFERENCE_TAIL
public static final int CHARACTER_REFERENCE_TAIL
- See Also:
- Constant Field Values
-
HEX_NCR_LOOP
public static final int HEX_NCR_LOOP
- See Also:
- Constant Field Values
-
DECIMAL_NRC_LOOP
public static final int DECIMAL_NRC_LOOP
- See Also:
- Constant Field Values
-
HANDLE_NCR_VALUE
public static final int HANDLE_NCR_VALUE
- See Also:
- Constant Field Values
-
HANDLE_NCR_VALUE_RECONSUME
public static final int HANDLE_NCR_VALUE_RECONSUME
- See Also:
- Constant Field Values
-
CHARACTER_REFERENCE_HILO_LOOKUP
public static final int CHARACTER_REFERENCE_HILO_LOOKUP
- See Also:
- Constant Field Values
-
SELF_CLOSING_START_TAG
public static final int SELF_CLOSING_START_TAG
- See Also:
- Constant Field Values
-
CDATA_START
public static final int CDATA_START
- See Also:
- Constant Field Values
-
CDATA_SECTION
public static final int CDATA_SECTION
- See Also:
- Constant Field Values
-
CDATA_RSQB
public static final int CDATA_RSQB
- See Also:
- Constant Field Values
-
CDATA_RSQB_RSQB
public static final int CDATA_RSQB_RSQB
- See Also:
- Constant Field Values
-
SCRIPT_DATA_LESS_THAN_SIGN
public static final int SCRIPT_DATA_LESS_THAN_SIGN
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPE_START
public static final int SCRIPT_DATA_ESCAPE_START
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPE_START_DASH
public static final int SCRIPT_DATA_ESCAPE_START_DASH
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPED_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPED_DASH_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH_DASH
- See Also:
- Constant Field Values
-
BOGUS_COMMENT_HYPHEN
public static final int BOGUS_COMMENT_HYPHEN
- See Also:
- Constant Field Values
-
RAWTEXT_RCDATA_LESS_THAN_SIGN
public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN
- See Also:
- Constant Field Values
-
SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPE_START
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPED
public static final int SCRIPT_DATA_DOUBLE_ESCAPED
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPED_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
- See Also:
- Constant Field Values
-
SCRIPT_DATA_DOUBLE_ESCAPE_END
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END
- See Also:
- Constant Field Values
-
PROCESSING_INSTRUCTION
public static final int PROCESSING_INSTRUCTION
- See Also:
- Constant Field Values
-
PROCESSING_INSTRUCTION_QUESTION_MARK
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK
- See Also:
- Constant Field Values
-
LEAD_OFFSET
private static final int LEAD_OFFSET
Magic value for UTF-16 operations.- See Also:
- Constant Field Values
-
LT_GT
private static final char[] LT_GT
UTF-16 code unit array containing less than and greater than for emitting those characters on certain parse errors.
-
LT_SOLIDUS
private static final char[] LT_SOLIDUS
UTF-16 code unit array containing less than and solidus for emitting those characters on certain parse errors.
-
RSQB_RSQB
private static final char[] RSQB_RSQB
UTF-16 code unit array containing ]] for emitting those characters on state transitions.
-
REPLACEMENT_CHARACTER
private static final char[] REPLACEMENT_CHARACTER
Array version of U+FFFD.
-
SPACE
private static final char[] SPACE
Array version of space.
-
LF
private static final char[] LF
Array version of line feed.
-
BUFFER_GROW_BY
private static final int BUFFER_GROW_BY
Buffer growth parameter.- See Also:
- Constant Field Values
-
CDATA_LSQB
private static final char[] CDATA_LSQB
"CDATA[" aschar[]
-
OCTYPE
private static final char[] OCTYPE
"octype" aschar[]
-
UBLIC
private static final char[] UBLIC
"ublic" aschar[]
-
YSTEM
private static final char[] YSTEM
"ystem" aschar[]
-
TITLE_ARR
private static final char[] TITLE_ARR
-
SCRIPT_ARR
private static final char[] SCRIPT_ARR
-
STYLE_ARR
private static final char[] STYLE_ARR
-
PLAINTEXT_ARR
private static final char[] PLAINTEXT_ARR
-
XMP_ARR
private static final char[] XMP_ARR
-
TEXTAREA_ARR
private static final char[] TEXTAREA_ARR
-
IFRAME_ARR
private static final char[] IFRAME_ARR
-
NOEMBED_ARR
private static final char[] NOEMBED_ARR
-
NOSCRIPT_ARR
private static final char[] NOSCRIPT_ARR
-
NOFRAMES_ARR
private static final char[] NOFRAMES_ARR
-
tokenHandler
protected final TokenHandler tokenHandler
The token handler.
-
encodingDeclarationHandler
protected EncodingDeclarationHandler encodingDeclarationHandler
-
errorHandler
protected org.xml.sax.ErrorHandler errorHandler
The error handler.
-
lastCR
protected boolean lastCR
Whether the previous char read was CR.
-
stateSave
protected int stateSave
-
returnStateSave
private int returnStateSave
-
index
protected int index
-
forceQuirks
private boolean forceQuirks
-
additional
private char additional
-
entCol
private int entCol
-
firstCharKey
private int firstCharKey
-
lo
private int lo
-
hi
private int hi
-
candidate
private int candidate
-
strBufMark
private int strBufMark
-
prevValue
private int prevValue
-
value
protected int value
-
seenDigits
private boolean seenDigits
-
cstart
protected int cstart
-
publicId
private java.lang.String publicId
The SAX public id for the resource being tokenized. (Only passed to back as part of locator data.)
-
systemId
private java.lang.String systemId
The SAX system id for the resource being tokenized. (Only passed to back as part of locator data.)
-
strBuf
private char[] strBuf
Buffer for short identifiers.
-
strBufLen
private int strBufLen
Number of significantchar
s instrBuf
.
-
longStrBuf
private char[] longStrBuf
Buffer for long strings.
-
longStrBufLen
private int longStrBufLen
Number of significantchar
s inlongStrBuf
.
-
bmpChar
private final char[] bmpChar
Buffer for expanding NCRs falling into the Basic Multilingual Plane.
-
astralChar
private final char[] astralChar
Buffer for expanding astral NCRs.
-
endTagExpectation
protected ElementName endTagExpectation
The element whose end tag closes the current CDATA or RCDATA element.
-
endTagExpectationAsArray
private char[] endTagExpectationAsArray
-
endTag
protected boolean endTag
true
if tokenizing an end tag
-
tagName
private ElementName tagName
The current tag token name.
-
attributeName
protected AttributeName attributeName
The current attribute name.
-
wantsComments
private boolean wantsComments
Whether comment tokens are emitted.
-
html4
protected boolean html4
true
when HTML4-specific additional errors are requested.
-
metaBoundaryPassed
private boolean metaBoundaryPassed
Whether the stream is past the first 512 bytes.
-
doctypeName
private java.lang.String doctypeName
The name of the current doctype token.
-
publicIdentifier
private java.lang.String publicIdentifier
The public id of the current doctype token.
-
systemIdentifier
private java.lang.String systemIdentifier
The system id of the current doctype token.
-
attributes
private HtmlAttributes attributes
The attribute holder.
-
contentSpacePolicy
private XmlViolationPolicy contentSpacePolicy
The policy for vertical tab and form feed.
-
commentPolicy
private XmlViolationPolicy commentPolicy
The policy for comments.
-
xmlnsPolicy
private XmlViolationPolicy xmlnsPolicy
-
namePolicy
private XmlViolationPolicy namePolicy
-
html4ModeCompatibleWithXhtml1Schemata
private boolean html4ModeCompatibleWithXhtml1Schemata
-
newAttributesEachTime
private final boolean newAttributesEachTime
-
mappingLangToXmlLang
private int mappingLangToXmlLang
-
shouldSuspend
private boolean shouldSuspend
-
confident
protected boolean confident
-
line
private int line
-
interner
private Interner interner
-
ampersandLocation
protected LocatorImpl ampersandLocation
-
-
Constructor Detail
-
Tokenizer
public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
-
Tokenizer
public Tokenizer(TokenHandler tokenHandler)
The constructor.- Parameters:
tokenHandler
- the handler for receiving tokens
-
-
Method Detail
-
setInterner
public void setInterner(Interner interner)
-
initLocation
public void initLocation(java.lang.String newPublicId, java.lang.String newSystemId)
-
isMappingLangToXmlLang
public boolean isMappingLangToXmlLang()
Returns the mappingLangToXmlLang.- Returns:
- the mappingLangToXmlLang
-
setMappingLangToXmlLang
public void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Sets the mappingLangToXmlLang.- Parameters:
mappingLangToXmlLang
- the mappingLangToXmlLang to set
-
setErrorHandler
public void setErrorHandler(org.xml.sax.ErrorHandler eh)
Sets the error handler.- See Also:
XMLReader.setErrorHandler(org.xml.sax.ErrorHandler)
-
getErrorHandler
public org.xml.sax.ErrorHandler getErrorHandler()
-
setCommentPolicy
public void setCommentPolicy(XmlViolationPolicy commentPolicy)
Sets the commentPolicy.- Parameters:
commentPolicy
- the commentPolicy to set
-
setContentNonXmlCharPolicy
public void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.- Parameters:
contentNonXmlCharPolicy
- the contentNonXmlCharPolicy to set
-
setContentSpacePolicy
public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Sets the contentSpacePolicy.- Parameters:
contentSpacePolicy
- the contentSpacePolicy to set
-
setXmlnsPolicy
public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Sets the xmlnsPolicy.- Parameters:
xmlnsPolicy
- the xmlnsPolicy to set
-
setNamePolicy
public void setNamePolicy(XmlViolationPolicy namePolicy)
-
setHtml4ModeCompatibleWithXhtml1Schemata
public void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Sets the html4ModeCompatibleWithXhtml1Schemata.- Parameters:
html4ModeCompatibleWithXhtml1Schemata
- the html4ModeCompatibleWithXhtml1Schemata to set
-
setStateAndEndTagExpectation
public void setStateAndEndTagExpectation(int specialTokenizerState, java.lang.String endTagExpectation)
Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.- Parameters:
specialTokenizerState
- the tokenizer state to setendTagExpectation
- the expected end tag for transitioning back to normal
-
setStateAndEndTagExpectation
public void setStateAndEndTagExpectation(int specialTokenizerState, ElementName endTagExpectation)
Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.- Parameters:
specialTokenizerState
- the tokenizer state to setendTagExpectation
- the expected end tag for transitioning back to normal
-
endTagExpectationToArray
private void endTagExpectationToArray()
-
setLineNumber
public void setLineNumber(int line)
For C++ use only.
-
getLineNumber
public int getLineNumber()
- Specified by:
getLineNumber
in interfaceorg.xml.sax.Locator
- See Also:
Locator.getLineNumber()
-
getColumnNumber
public int getColumnNumber()
- Specified by:
getColumnNumber
in interfaceorg.xml.sax.Locator
- See Also:
Locator.getColumnNumber()
-
getPublicId
public java.lang.String getPublicId()
- Specified by:
getPublicId
in interfaceorg.xml.sax.Locator
- See Also:
Locator.getPublicId()
-
getSystemId
public java.lang.String getSystemId()
- Specified by:
getSystemId
in interfaceorg.xml.sax.Locator
- See Also:
Locator.getSystemId()
-
notifyAboutMetaBoundary
public void notifyAboutMetaBoundary()
-
turnOnAdditionalHtml4Errors
void turnOnAdditionalHtml4Errors()
-
emptyAttributes
HtmlAttributes emptyAttributes()
-
clearStrBufAndAppend
private void clearStrBufAndAppend(char c)
-
clearStrBuf
private void clearStrBuf()
-
appendStrBuf
private void appendStrBuf(char c)
Appends to the smaller buffer.- Parameters:
c
- the UTF-16 code unit to append
-
strBufToString
protected java.lang.String strBufToString()
The smaller buffer as a String. Currently only used for error reporting.C++ memory note: The return value must be released.
- Returns:
- the smaller buffer as a string
-
strBufToDoctypeName
private void strBufToDoctypeName()
Returns the short buffer as a local name. The return value is released in emitDoctypeToken().
-
emitStrBuf
private void emitStrBuf() throws org.xml.sax.SAXException
Emits the smaller buffer as character tokens.- Throws:
org.xml.sax.SAXException
- if the token handler threw
-
clearLongStrBuf
private void clearLongStrBuf()
-
clearLongStrBufAndAppend
private void clearLongStrBufAndAppend(char c)
-
appendLongStrBuf
private void appendLongStrBuf(char c)
Appends to the larger buffer.- Parameters:
c
- the UTF-16 code unit to append
-
appendSecondHyphenToBogusComment
private void appendSecondHyphenToBogusComment() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
maybeAppendSpaceToBogusComment
private void maybeAppendSpaceToBogusComment() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
adjustDoubleHyphenAndAppendToLongStrBufAndErr
private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
appendLongStrBuf
private void appendLongStrBuf(char[] buffer, int offset, int length)
-
appendStrBufToLongStrBuf
private void appendStrBufToLongStrBuf()
Append the contents of the smaller buffer to the larger one.
-
longStrBufToString
private java.lang.String longStrBufToString()
The larger buffer as a string.C++ memory note: The return value must be released.
- Returns:
- the larger buffer as a string
-
emitComment
private void emitComment(int provisionalHyphens, int pos) throws org.xml.sax.SAXException
Emits the current comment token.- Parameters:
pos
- TODO- Throws:
org.xml.sax.SAXException
-
flushChars
protected void flushChars(char[] buf, int pos) throws org.xml.sax.SAXException
Flushes coalesced character tokens.- Parameters:
buf
- TODOpos
- TODO- Throws:
org.xml.sax.SAXException
-
fatal
public void fatal(java.lang.String message) throws org.xml.sax.SAXException
Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.- Parameters:
message
- the message- Throws:
org.xml.sax.SAXException
org.xml.sax.SAXParseException
-
err
public void err(java.lang.String message) throws org.xml.sax.SAXException
Reports a Parse Error.- Parameters:
message
- the message- Throws:
org.xml.sax.SAXException
-
errTreeBuilder
public void errTreeBuilder(java.lang.String message) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
warn
public void warn(java.lang.String message) throws org.xml.sax.SAXException
Reports a warning- Parameters:
message
- the message- Throws:
org.xml.sax.SAXException
-
resetAttributes
private void resetAttributes()
-
strBufToElementNameString
private void strBufToElementNameString()
-
emitCurrentTagToken
private int emitCurrentTagToken(boolean selfClosing, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
attributeNameComplete
private void attributeNameComplete() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
addAttributeWithoutValue
private void addAttributeWithoutValue() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
addAttributeWithValue
private void addAttributeWithValue() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
newAsciiLowerCaseStringFromString
private static java.lang.String newAsciiLowerCaseStringFromString(java.lang.String str)
-
startErrorReporting
protected void startErrorReporting() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
start
public void start() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
tokenizeBuffer
public boolean tokenizeBuffer(UTF16Buffer buffer) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
stateLoop
private int stateLoop(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
workAroundHotSpotHugeMethodLimit
private long workAroundHotSpotHugeMethodLimit(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos) throws org.xml.sax.SAXException
compressed returnValue: int returnState = returnValue >> 33 boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0) int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue- Throws:
org.xml.sax.SAXException
-
transition
protected int transition(int from, int to, boolean reconsume, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
initDoctypeFields
private void initDoctypeFields()
-
adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn
private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
adjustDoubleHyphenAndAppendToLongStrBufLineFeed
private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
appendLongStrBufLineFeed
private void appendLongStrBufLineFeed()
-
appendLongStrBufCarriageReturn
private void appendLongStrBufCarriageReturn()
-
silentCarriageReturn
protected void silentCarriageReturn()
-
silentLineFeed
protected void silentLineFeed()
-
emitCarriageReturn
private void emitCarriageReturn(char[] buf, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
emitReplacementCharacter
private void emitReplacementCharacter(char[] buf, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
emitPlaintextReplacementCharacter
private void emitPlaintextReplacementCharacter(char[] buf, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
setAdditionalAndRememberAmpersandLocation
private void setAdditionalAndRememberAmpersandLocation(char add)
-
bogusDoctype
private void bogusDoctype() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
bogusDoctypeWithoutQuirks
private void bogusDoctypeWithoutQuirks() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
emitOrAppendStrBuf
private void emitOrAppendStrBuf(int returnState) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
handleNcrValue
private void handleNcrValue(int returnState) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
eof
public void eof() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
emitDoctypeToken
private void emitDoctypeToken(int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
checkChar
protected char checkChar(char[] buf, int pos) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
isAlreadyComplainedAboutNonAscii
public boolean isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.- Returns:
- the alreadyComplainedAboutNonAscii
-
internalEncodingDeclaration
public boolean internalEncodingDeclaration(java.lang.String internalCharset) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
emitOrAppendTwo
private void emitOrAppendTwo(char[] val, int returnState) throws org.xml.sax.SAXException
- Parameters:
val
-- Throws:
org.xml.sax.SAXException
-
emitOrAppendOne
private void emitOrAppendOne(char[] val, int returnState) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
end
public void end() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
requestSuspension
public void requestSuspension()
-
becomeConfident
public void becomeConfident()
-
isNextCharOnNewLine
public boolean isNextCharOnNewLine()
Returns the nextCharOnNewLine.- Returns:
- the nextCharOnNewLine
-
isPrevCR
public boolean isPrevCR()
-
getLine
public int getLine()
Returns the line.- Returns:
- the line
-
getCol
public int getCol()
Returns the col.- Returns:
- the col
-
isInDataState
public boolean isInDataState()
-
resetToDataState
public void resetToDataState()
-
loadState
public void loadState(Tokenizer other) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
initializeWithoutStarting
public void initializeWithoutStarting() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errGarbageAfterLtSlash
protected void errGarbageAfterLtSlash() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errLtSlashGt
protected void errLtSlashGt() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errWarnLtSlashInRcdata
protected void errWarnLtSlashInRcdata() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errHtml4LtSlashInRcdata
protected void errHtml4LtSlashInRcdata(char folded) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errCharRefLacksSemicolon
protected void errCharRefLacksSemicolon() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoDigitsInNCR
protected void errNoDigitsInNCR() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errGtInSystemId
protected void errGtInSystemId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errGtInPublicId
protected void errGtInPublicId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNamelessDoctype
protected void errNamelessDoctype() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errConsecutiveHyphens
protected void errConsecutiveHyphens() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errPrematureEndOfComment
protected void errPrematureEndOfComment() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errBogusComment
protected void errBogusComment() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errUnquotedAttributeValOrNull
protected void errUnquotedAttributeValOrNull(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errSlashNotFollowedByGt
protected void errSlashNotFollowedByGt() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errHtml4XmlVoidSyntax
protected void errHtml4XmlVoidSyntax() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoSpaceBetweenAttributes
protected void errNoSpaceBetweenAttributes() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errHtml4NonNameInUnquotedAttribute
protected void errHtml4NonNameInUnquotedAttribute(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errLtOrEqualsOrGraveInUnquotedAttributeOrNull
protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errAttributeValueMissing
protected void errAttributeValueMissing() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errBadCharBeforeAttributeNameOrNull
protected void errBadCharBeforeAttributeNameOrNull(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEqualsSignBeforeAttributeName
protected void errEqualsSignBeforeAttributeName() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errBadCharAfterLt
protected void errBadCharAfterLt(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errLtGt
protected void errLtGt() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errProcessingInstruction
protected void errProcessingInstruction() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errUnescapedAmpersandInterpretedAsCharacterReference
protected void errUnescapedAmpersandInterpretedAsCharacterReference() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNotSemicolonTerminated
protected void errNotSemicolonTerminated() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoNamedCharacterMatch
protected void errNoNamedCharacterMatch() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errQuoteBeforeAttributeName
protected void errQuoteBeforeAttributeName(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errQuoteOrLtInAttributeNameOrNull
protected void errQuoteOrLtInAttributeNameOrNull(char c) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errExpectedPublicId
protected void errExpectedPublicId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errBogusDoctype
protected void errBogusDoctype() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
maybeWarnPrivateUseAstral
protected void maybeWarnPrivateUseAstral() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
maybeWarnPrivateUse
protected void maybeWarnPrivateUse(char ch) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
maybeErrAttributesOnEndTag
protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
maybeErrSlashInEndTag
protected void maybeErrSlashInEndTag(boolean selfClosing) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrNonCharacter
protected char errNcrNonCharacter(char ch) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errAstralNonCharacter
protected void errAstralNonCharacter(int ch) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrSurrogate
protected void errNcrSurrogate() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrControlChar
protected char errNcrControlChar(char ch) throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrCr
protected void errNcrCr() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrInC1Range
protected void errNcrInC1Range() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInPublicId
protected void errEofInPublicId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInComment
protected void errEofInComment() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInDoctype
protected void errEofInDoctype() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInAttributeValue
protected void errEofInAttributeValue() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInAttributeName
protected void errEofInAttributeName() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofWithoutGt
protected void errEofWithoutGt() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInTagName
protected void errEofInTagName() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInEndTag
protected void errEofInEndTag() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofAfterLt
protected void errEofAfterLt() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrOutOfRange
protected void errNcrOutOfRange() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrUnassigned
protected void errNcrUnassigned() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errDuplicateAttribute
protected void errDuplicateAttribute() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errEofInSystemId
protected void errEofInSystemId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errExpectedSystemId
protected void errExpectedSystemId() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errMissingSpaceBeforeDoctypeName
protected void errMissingSpaceBeforeDoctypeName() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errHyphenHyphenBang
protected void errHyphenHyphenBang() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrControlChar
protected void errNcrControlChar() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNcrZero
protected void errNcrZero() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoSpaceBetweenDoctypeSystemKeywordAndQuote
protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoSpaceBetweenPublicAndSystemIds
protected void errNoSpaceBetweenPublicAndSystemIds() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
errNoSpaceBetweenDoctypePublicKeywordAndQuote
protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
noteAttributeWithoutValue
protected void noteAttributeWithoutValue() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
noteUnquotedAttributeValue
protected void noteUnquotedAttributeValue() throws org.xml.sax.SAXException
- Throws:
org.xml.sax.SAXException
-
setEncodingDeclarationHandler
public void setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
Sets the encodingDeclarationHandler.- Parameters:
encodingDeclarationHandler
- the encodingDeclarationHandler to set
-
destructor
void destructor()
-
setTransitionBaseOffset
public void setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported toTransitionHandler
.- Parameters:
offset
- the offset
-
-