public class Tokenizer
extends java.lang.Object
implements org.xml.sax.Locator
Locator
interface. This is not an
incidental implementation detail: Users of this class are encouraged to make
use of the Locator
nature.
By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
can be configured to treat these conditions as fatal or to coerce the infoset
to something that XML 1.0 allows.Modifier and Type | Field and Description |
---|---|
private char |
additional |
static int |
AFTER_ATTRIBUTE_NAME |
static int |
AFTER_ATTRIBUTE_VALUE_QUOTED |
static int |
AFTER_DOCTYPE_NAME |
static int |
AFTER_DOCTYPE_PUBLIC_IDENTIFIER |
static int |
AFTER_DOCTYPE_PUBLIC_KEYWORD |
static int |
AFTER_DOCTYPE_SYSTEM_IDENTIFIER |
static int |
AFTER_DOCTYPE_SYSTEM_KEYWORD |
protected LocatorImpl |
ampersandLocation |
private char[] |
astralChar
Buffer for expanding astral NCRs.
|
static int |
ATTRIBUTE_NAME |
static int |
ATTRIBUTE_VALUE_DOUBLE_QUOTED |
static int |
ATTRIBUTE_VALUE_SINGLE_QUOTED |
static int |
ATTRIBUTE_VALUE_UNQUOTED |
protected AttributeName |
attributeName
The current attribute name.
|
private HtmlAttributes |
attributes
The attribute holder.
|
static int |
BEFORE_ATTRIBUTE_NAME |
static int |
BEFORE_ATTRIBUTE_VALUE |
static int |
BEFORE_DOCTYPE_NAME |
static int |
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER |
static int |
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER |
static int |
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS |
private char[] |
bmpChar
Buffer for expanding NCRs falling into the Basic Multilingual Plane.
|
static int |
BOGUS_COMMENT |
static int |
BOGUS_COMMENT_HYPHEN |
static int |
BOGUS_DOCTYPE |
private static int |
BUFFER_GROW_BY
Buffer growth parameter.
|
private int |
candidate |
private static char[] |
CDATA_LSQB
"CDATA[" as
char[] |
static int |
CDATA_RSQB |
static int |
CDATA_RSQB_RSQB |
static int |
CDATA_SECTION |
static int |
CDATA_START |
static int |
CHARACTER_REFERENCE_HILO_LOOKUP |
static int |
CHARACTER_REFERENCE_TAIL |
static int |
CLOSE_TAG_OPEN |
static int |
COMMENT |
static int |
COMMENT_END |
static int |
COMMENT_END_BANG |
static int |
COMMENT_END_DASH |
static int |
COMMENT_START |
static int |
COMMENT_START_DASH |
private XmlViolationPolicy |
commentPolicy
The policy for comments.
|
protected boolean |
confident |
static int |
CONSUME_CHARACTER_REFERENCE |
static int |
CONSUME_NCR |
private XmlViolationPolicy |
contentSpacePolicy
The policy for vertical tab and form feed.
|
protected int |
cstart |
static int |
DATA |
private static int |
DATA_AND_RCDATA_MASK |
static int |
DECIMAL_NRC_LOOP |
static int |
DOCTYPE |
static int |
DOCTYPE_NAME |
static int |
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED |
static int |
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED |
static int |
DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED |
static int |
DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED |
static int |
DOCTYPE_UBLIC |
static int |
DOCTYPE_YSTEM |
private java.lang.String |
doctypeName
The name of the current doctype token.
|
protected EncodingDeclarationHandler |
encodingDeclarationHandler |
protected boolean |
endTag
true if tokenizing an end tag |
protected ElementName |
endTagExpectation
The element whose end tag closes the current CDATA or RCDATA element.
|
private char[] |
endTagExpectationAsArray |
private int |
entCol |
protected org.xml.sax.ErrorHandler |
errorHandler
The error handler.
|
private int |
firstCharKey |
private boolean |
forceQuirks |
static int |
HANDLE_NCR_VALUE |
static int |
HANDLE_NCR_VALUE_RECONSUME |
static int |
HEX_NCR_LOOP |
private int |
hi |
protected boolean |
html4
true when HTML4-specific additional errors are requested. |
private boolean |
html4ModeCompatibleWithXhtml1Schemata |
private static char[] |
IFRAME_ARR |
protected int |
index |
private Interner |
interner |
protected boolean |
lastCR
Whether the previous char read was CR.
|
private static int |
LEAD_OFFSET
Magic value for UTF-16 operations.
|
private static char[] |
LF
Array version of line feed.
|
private int |
line |
private int |
lo |
private char[] |
longStrBuf
Buffer for long strings.
|
private int |
longStrBufLen
Number of significant
char s in longStrBuf . |
private static char[] |
LT_GT
UTF-16 code unit array containing less than and greater than for emitting
those characters on certain parse errors.
|
private static char[] |
LT_SOLIDUS
UTF-16 code unit array containing less than and solidus for emitting
those characters on certain parse errors.
|
private int |
mappingLangToXmlLang |
static int |
MARKUP_DECLARATION_HYPHEN |
static int |
MARKUP_DECLARATION_OCTYPE |
static int |
MARKUP_DECLARATION_OPEN |
private boolean |
metaBoundaryPassed
Whether the stream is past the first 512 bytes.
|
private XmlViolationPolicy |
namePolicy |
private boolean |
newAttributesEachTime |
private static char[] |
NOEMBED_ARR |
private static char[] |
NOFRAMES_ARR |
static int |
NON_DATA_END_TAG_NAME |
private static char[] |
NOSCRIPT_ARR |
private static char[] |
OCTYPE
"octype" as
char[] |
static int |
PLAINTEXT |
private static char[] |
PLAINTEXT_ARR |
private int |
prevValue |
static int |
PROCESSING_INSTRUCTION |
static int |
PROCESSING_INSTRUCTION_QUESTION_MARK |
private java.lang.String |
publicId
The SAX public id for the resource being tokenized.
|
private java.lang.String |
publicIdentifier
The public id of the current doctype token.
|
static int |
RAWTEXT |
static int |
RAWTEXT_RCDATA_LESS_THAN_SIGN |
static int |
RCDATA |
private static char[] |
REPLACEMENT_CHARACTER
Array version of U+FFFD.
|
private int |
returnStateSave |
private static char[] |
RSQB_RSQB
UTF-16 code unit array containing ]] for emitting those characters on
state transitions.
|
private static char[] |
SCRIPT_ARR |
static int |
SCRIPT_DATA |
static int |
SCRIPT_DATA_DOUBLE_ESCAPE_END |
static int |
SCRIPT_DATA_DOUBLE_ESCAPE_START |
static int |
SCRIPT_DATA_DOUBLE_ESCAPED |
static int |
SCRIPT_DATA_DOUBLE_ESCAPED_DASH |
static int |
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH |
static int |
SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN |
static int |
SCRIPT_DATA_ESCAPE_START |
static int |
SCRIPT_DATA_ESCAPE_START_DASH |
static int |
SCRIPT_DATA_ESCAPED |
static int |
SCRIPT_DATA_ESCAPED_DASH |
static int |
SCRIPT_DATA_ESCAPED_DASH_DASH |
static int |
SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN |
static int |
SCRIPT_DATA_LESS_THAN_SIGN |
private boolean |
seenDigits |
static int |
SELF_CLOSING_START_TAG |
private boolean |
shouldSuspend |
private static char[] |
SPACE
Array version of space.
|
protected int |
stateSave |
private char[] |
strBuf
Buffer for short identifiers.
|
private int |
strBufLen
Number of significant
char s in strBuf . |
private int |
strBufMark |
private static char[] |
STYLE_ARR |
private java.lang.String |
systemId
The SAX system id for the resource being tokenized.
|
private java.lang.String |
systemIdentifier
The system id of the current doctype token.
|
static int |
TAG_NAME |
static int |
TAG_OPEN |
private ElementName |
tagName
The current tag token name.
|
private static char[] |
TEXTAREA_ARR |
private static char[] |
TITLE_ARR |
protected TokenHandler |
tokenHandler
The token handler.
|
private static char[] |
UBLIC
"ublic" as
char[] |
protected int |
value |
private boolean |
wantsComments
Whether comment tokens are emitted.
|
private XmlViolationPolicy |
xmlnsPolicy |
private static char[] |
XMP_ARR |
private static char[] |
YSTEM
"ystem" as
char[] |
Constructor and Description |
---|
Tokenizer(TokenHandler tokenHandler)
The constructor.
|
Tokenizer(TokenHandler tokenHandler,
boolean newAttributesEachTime) |
Modifier and Type | Method and Description |
---|---|
private void |
addAttributeWithoutValue() |
private void |
addAttributeWithValue() |
private void |
adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c) |
private void |
adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() |
private void |
adjustDoubleHyphenAndAppendToLongStrBufLineFeed() |
private void |
appendLongStrBuf(char c)
Appends to the larger buffer.
|
private void |
appendLongStrBuf(char[] buffer,
int offset,
int length) |
private void |
appendLongStrBufCarriageReturn() |
private void |
appendLongStrBufLineFeed() |
private void |
appendSecondHyphenToBogusComment() |
private void |
appendStrBuf(char c)
Appends to the smaller buffer.
|
private void |
appendStrBufToLongStrBuf()
Append the contents of the smaller buffer to the larger one.
|
private void |
attributeNameComplete() |
void |
becomeConfident() |
private void |
bogusDoctype() |
private void |
bogusDoctypeWithoutQuirks() |
protected char |
checkChar(char[] buf,
int pos) |
private void |
clearLongStrBuf() |
private void |
clearLongStrBufAndAppend(char c) |
private void |
clearStrBuf() |
private void |
clearStrBufAndAppend(char c) |
(package private) void |
destructor() |
private void |
emitCarriageReturn(char[] buf,
int pos) |
private void |
emitComment(int provisionalHyphens,
int pos)
Emits the current comment token.
|
private int |
emitCurrentTagToken(boolean selfClosing,
int pos) |
private void |
emitDoctypeToken(int pos) |
private void |
emitOrAppendOne(char[] val,
int returnState) |
private void |
emitOrAppendStrBuf(int returnState) |
private void |
emitOrAppendTwo(char[] val,
int returnState) |
private void |
emitPlaintextReplacementCharacter(char[] buf,
int pos) |
private void |
emitReplacementCharacter(char[] buf,
int pos) |
private void |
emitStrBuf()
Emits the smaller buffer as character tokens.
|
(package private) HtmlAttributes |
emptyAttributes() |
void |
end() |
private void |
endTagExpectationToArray() |
void |
eof() |
void |
err(java.lang.String message)
Reports a Parse Error.
|
protected void |
errAstralNonCharacter(int ch) |
protected void |
errAttributeValueMissing() |
protected void |
errBadCharAfterLt(char c) |
protected void |
errBadCharBeforeAttributeNameOrNull(char c) |
protected void |
errBogusComment() |
protected void |
errBogusDoctype() |
protected void |
errCharRefLacksSemicolon() |
protected void |
errConsecutiveHyphens() |
protected void |
errDuplicateAttribute() |
protected void |
errEofAfterLt() |
protected void |
errEofInAttributeName() |
protected void |
errEofInAttributeValue() |
protected void |
errEofInComment() |
protected void |
errEofInDoctype() |
protected void |
errEofInEndTag() |
protected void |
errEofInPublicId() |
protected void |
errEofInSystemId() |
protected void |
errEofInTagName() |
protected void |
errEofWithoutGt() |
protected void |
errEqualsSignBeforeAttributeName() |
protected void |
errExpectedPublicId() |
protected void |
errExpectedSystemId() |
protected void |
errGarbageAfterLtSlash() |
protected void |
errGtInPublicId() |
protected void |
errGtInSystemId() |
protected void |
errHtml4LtSlashInRcdata(char folded) |
protected void |
errHtml4NonNameInUnquotedAttribute(char c) |
protected void |
errHtml4XmlVoidSyntax() |
protected void |
errHyphenHyphenBang() |
protected void |
errLtGt() |
protected void |
errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) |
protected void |
errLtSlashGt() |
protected void |
errMissingSpaceBeforeDoctypeName() |
protected void |
errNamelessDoctype() |
protected void |
errNcrControlChar() |
protected char |
errNcrControlChar(char ch) |
protected void |
errNcrCr() |
protected void |
errNcrInC1Range() |
protected char |
errNcrNonCharacter(char ch) |
protected void |
errNcrOutOfRange() |
protected void |
errNcrSurrogate() |
protected void |
errNcrUnassigned() |
protected void |
errNcrZero() |
protected void |
errNoDigitsInNCR() |
protected void |
errNoNamedCharacterMatch() |
protected void |
errNoSpaceBetweenAttributes() |
protected void |
errNoSpaceBetweenDoctypePublicKeywordAndQuote() |
protected void |
errNoSpaceBetweenDoctypeSystemKeywordAndQuote() |
protected void |
errNoSpaceBetweenPublicAndSystemIds() |
protected void |
errNotSemicolonTerminated() |
protected void |
errPrematureEndOfComment() |
protected void |
errProcessingInstruction() |
protected void |
errQuoteBeforeAttributeName(char c) |
protected void |
errQuoteOrLtInAttributeNameOrNull(char c) |
protected void |
errSlashNotFollowedByGt() |
void |
errTreeBuilder(java.lang.String message) |
protected void |
errUnescapedAmpersandInterpretedAsCharacterReference() |
protected void |
errUnquotedAttributeValOrNull(char c) |
protected void |
errWarnLtSlashInRcdata() |
void |
fatal(java.lang.String message)
Reports an condition that would make the infoset incompatible with XML
1.0 as fatal.
|
protected void |
flushChars(char[] buf,
int pos)
Flushes coalesced character tokens.
|
int |
getCol()
Returns the col.
|
int |
getColumnNumber() |
org.xml.sax.ErrorHandler |
getErrorHandler() |
int |
getLine()
Returns the line.
|
int |
getLineNumber() |
java.lang.String |
getPublicId() |
java.lang.String |
getSystemId() |
private void |
handleNcrValue(int returnState) |
private void |
initDoctypeFields() |
void |
initializeWithoutStarting() |
void |
initLocation(java.lang.String newPublicId,
java.lang.String newSystemId) |
boolean |
internalEncodingDeclaration(java.lang.String internalCharset) |
boolean |
isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.
|
boolean |
isInDataState() |
boolean |
isMappingLangToXmlLang()
Returns the mappingLangToXmlLang.
|
boolean |
isNextCharOnNewLine()
Returns the nextCharOnNewLine.
|
boolean |
isPrevCR() |
void |
loadState(Tokenizer other) |
private java.lang.String |
longStrBufToString()
The larger buffer as a string.
|
private void |
maybeAppendSpaceToBogusComment() |
protected void |
maybeErrAttributesOnEndTag(HtmlAttributes attrs) |
protected void |
maybeErrSlashInEndTag(boolean selfClosing) |
protected void |
maybeWarnPrivateUse(char ch) |
protected void |
maybeWarnPrivateUseAstral() |
private static java.lang.String |
newAsciiLowerCaseStringFromString(java.lang.String str) |
protected void |
noteAttributeWithoutValue() |
protected void |
noteUnquotedAttributeValue() |
void |
notifyAboutMetaBoundary() |
void |
requestSuspension() |
private void |
resetAttributes() |
void |
resetToDataState() |
private void |
setAdditionalAndRememberAmpersandLocation(char add) |
void |
setCommentPolicy(XmlViolationPolicy commentPolicy)
Sets the commentPolicy.
|
void |
setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.
|
void |
setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Sets the contentSpacePolicy.
|
void |
setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
Sets the encodingDeclarationHandler.
|
void |
setErrorHandler(org.xml.sax.ErrorHandler eh)
Sets the error handler.
|
void |
setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Sets the html4ModeCompatibleWithXhtml1Schemata.
|
void |
setInterner(Interner interner) |
void |
setLineNumber(int line)
For C++ use only.
|
void |
setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Sets the mappingLangToXmlLang.
|
void |
setNamePolicy(XmlViolationPolicy namePolicy) |
void |
setStateAndEndTagExpectation(int specialTokenizerState,
ElementName endTagExpectation)
Sets the tokenizer state and the associated element name.
|
void |
setStateAndEndTagExpectation(int specialTokenizerState,
java.lang.String endTagExpectation)
Sets the tokenizer state and the associated element name.
|
void |
setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported to
TransitionHandler . |
void |
setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Sets the xmlnsPolicy.
|
protected void |
silentCarriageReturn() |
protected void |
silentLineFeed() |
void |
start() |
protected void |
startErrorReporting() |
private int |
stateLoop(int state,
char c,
int pos,
char[] buf,
boolean reconsume,
int returnState,
int endPos) |
private void |
strBufToDoctypeName()
Returns the short buffer as a local name.
|
private void |
strBufToElementNameString() |
protected java.lang.String |
strBufToString()
The smaller buffer as a String.
|
boolean |
tokenizeBuffer(UTF16Buffer buffer) |
protected int |
transition(int from,
int to,
boolean reconsume,
int pos) |
(package private) void |
turnOnAdditionalHtml4Errors() |
void |
warn(java.lang.String message)
Reports a warning
|
private long |
workAroundHotSpotHugeMethodLimit(int state,
char c,
int pos,
char[] buf,
boolean reconsume,
int returnState,
int endPos)
compressed returnValue:
int returnState = returnValue >> 33
boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0)
int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
|
private static final int DATA_AND_RCDATA_MASK
public static final int DATA
public static final int RCDATA
public static final int SCRIPT_DATA
public static final int RAWTEXT
public static final int SCRIPT_DATA_ESCAPED
public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
public static final int ATTRIBUTE_VALUE_UNQUOTED
public static final int PLAINTEXT
public static final int TAG_OPEN
public static final int CLOSE_TAG_OPEN
public static final int TAG_NAME
public static final int BEFORE_ATTRIBUTE_NAME
public static final int ATTRIBUTE_NAME
public static final int AFTER_ATTRIBUTE_NAME
public static final int BEFORE_ATTRIBUTE_VALUE
public static final int AFTER_ATTRIBUTE_VALUE_QUOTED
public static final int BOGUS_COMMENT
public static final int MARKUP_DECLARATION_OPEN
public static final int DOCTYPE
public static final int BEFORE_DOCTYPE_NAME
public static final int DOCTYPE_NAME
public static final int AFTER_DOCTYPE_NAME
public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
public static final int BOGUS_DOCTYPE
public static final int COMMENT_START
public static final int COMMENT_START_DASH
public static final int COMMENT
public static final int COMMENT_END_DASH
public static final int COMMENT_END
public static final int COMMENT_END_BANG
public static final int NON_DATA_END_TAG_NAME
public static final int MARKUP_DECLARATION_HYPHEN
public static final int MARKUP_DECLARATION_OCTYPE
public static final int DOCTYPE_UBLIC
public static final int DOCTYPE_YSTEM
public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD
public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD
public static final int CONSUME_CHARACTER_REFERENCE
public static final int CONSUME_NCR
public static final int CHARACTER_REFERENCE_TAIL
public static final int HEX_NCR_LOOP
public static final int DECIMAL_NRC_LOOP
public static final int HANDLE_NCR_VALUE
public static final int HANDLE_NCR_VALUE_RECONSUME
public static final int CHARACTER_REFERENCE_HILO_LOOKUP
public static final int SELF_CLOSING_START_TAG
public static final int CDATA_START
public static final int CDATA_SECTION
public static final int CDATA_RSQB
public static final int CDATA_RSQB_RSQB
public static final int SCRIPT_DATA_LESS_THAN_SIGN
public static final int SCRIPT_DATA_ESCAPE_START
public static final int SCRIPT_DATA_ESCAPE_START_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH_DASH
public static final int BOGUS_COMMENT_HYPHEN
public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN
public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START
public static final int SCRIPT_DATA_DOUBLE_ESCAPED
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END
public static final int PROCESSING_INSTRUCTION
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK
private static final int LEAD_OFFSET
private static final char[] LT_GT
private static final char[] LT_SOLIDUS
private static final char[] RSQB_RSQB
private static final char[] REPLACEMENT_CHARACTER
private static final char[] SPACE
private static final char[] LF
private static final int BUFFER_GROW_BY
private static final char[] CDATA_LSQB
char[]
private static final char[] OCTYPE
char[]
private static final char[] UBLIC
char[]
private static final char[] YSTEM
char[]
private static final char[] TITLE_ARR
private static final char[] SCRIPT_ARR
private static final char[] STYLE_ARR
private static final char[] PLAINTEXT_ARR
private static final char[] XMP_ARR
private static final char[] TEXTAREA_ARR
private static final char[] IFRAME_ARR
private static final char[] NOEMBED_ARR
private static final char[] NOSCRIPT_ARR
private static final char[] NOFRAMES_ARR
protected final TokenHandler tokenHandler
protected EncodingDeclarationHandler encodingDeclarationHandler
protected org.xml.sax.ErrorHandler errorHandler
protected boolean lastCR
protected int stateSave
private int returnStateSave
protected int index
private boolean forceQuirks
private char additional
private int entCol
private int firstCharKey
private int lo
private int hi
private int candidate
private int strBufMark
private int prevValue
protected int value
private boolean seenDigits
protected int cstart
private java.lang.String publicId
private java.lang.String systemId
private char[] strBuf
private int strBufLen
char
s in strBuf
.private char[] longStrBuf
private int longStrBufLen
char
s in longStrBuf
.private final char[] bmpChar
private final char[] astralChar
protected ElementName endTagExpectation
private char[] endTagExpectationAsArray
protected boolean endTag
true
if tokenizing an end tagprivate ElementName tagName
protected AttributeName attributeName
private boolean wantsComments
protected boolean html4
true
when HTML4-specific additional errors are requested.private boolean metaBoundaryPassed
private java.lang.String doctypeName
private java.lang.String publicIdentifier
private java.lang.String systemIdentifier
private HtmlAttributes attributes
private XmlViolationPolicy contentSpacePolicy
private XmlViolationPolicy commentPolicy
private XmlViolationPolicy xmlnsPolicy
private XmlViolationPolicy namePolicy
private boolean html4ModeCompatibleWithXhtml1Schemata
private final boolean newAttributesEachTime
private int mappingLangToXmlLang
private boolean shouldSuspend
protected boolean confident
private int line
private Interner interner
protected LocatorImpl ampersandLocation
public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
public Tokenizer(TokenHandler tokenHandler)
tokenHandler
- the handler for receiving tokenspublic void setInterner(Interner interner)
public void initLocation(java.lang.String newPublicId, java.lang.String newSystemId)
public boolean isMappingLangToXmlLang()
public void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
mappingLangToXmlLang
- the mappingLangToXmlLang to setpublic void setErrorHandler(org.xml.sax.ErrorHandler eh)
XMLReader.setErrorHandler(org.xml.sax.ErrorHandler)
public org.xml.sax.ErrorHandler getErrorHandler()
public void setCommentPolicy(XmlViolationPolicy commentPolicy)
commentPolicy
- the commentPolicy to setpublic void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
contentNonXmlCharPolicy
- the contentNonXmlCharPolicy to setpublic void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
contentSpacePolicy
- the contentSpacePolicy to setpublic void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
xmlnsPolicy
- the xmlnsPolicy to setpublic void setNamePolicy(XmlViolationPolicy namePolicy)
public void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
html4ModeCompatibleWithXhtml1Schemata
- the html4ModeCompatibleWithXhtml1Schemata to setpublic void setStateAndEndTagExpectation(int specialTokenizerState, java.lang.String endTagExpectation)
specialTokenizerState
- the tokenizer state to setendTagExpectation
- the expected end tag for transitioning back to normalpublic void setStateAndEndTagExpectation(int specialTokenizerState, ElementName endTagExpectation)
specialTokenizerState
- the tokenizer state to setendTagExpectation
- the expected end tag for transitioning back to normalprivate void endTagExpectationToArray()
public void setLineNumber(int line)
public int getLineNumber()
getLineNumber
in interface org.xml.sax.Locator
Locator.getLineNumber()
public int getColumnNumber()
getColumnNumber
in interface org.xml.sax.Locator
Locator.getColumnNumber()
public java.lang.String getPublicId()
getPublicId
in interface org.xml.sax.Locator
Locator.getPublicId()
public java.lang.String getSystemId()
getSystemId
in interface org.xml.sax.Locator
Locator.getSystemId()
public void notifyAboutMetaBoundary()
void turnOnAdditionalHtml4Errors()
HtmlAttributes emptyAttributes()
private void clearStrBufAndAppend(char c)
private void clearStrBuf()
private void appendStrBuf(char c)
c
- the UTF-16 code unit to appendprotected java.lang.String strBufToString()
C++ memory note: The return value must be released.
private void strBufToDoctypeName()
private void emitStrBuf() throws org.xml.sax.SAXException
org.xml.sax.SAXException
- if the token handler threwprivate void clearLongStrBuf()
private void clearLongStrBufAndAppend(char c)
private void appendLongStrBuf(char c)
c
- the UTF-16 code unit to appendprivate void appendSecondHyphenToBogusComment() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void maybeAppendSpaceToBogusComment() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void appendLongStrBuf(char[] buffer, int offset, int length)
private void appendStrBufToLongStrBuf()
private java.lang.String longStrBufToString()
C++ memory note: The return value must be released.
private void emitComment(int provisionalHyphens, int pos) throws org.xml.sax.SAXException
pos
- TODOorg.xml.sax.SAXException
protected void flushChars(char[] buf, int pos) throws org.xml.sax.SAXException
buf
- TODOpos
- TODOorg.xml.sax.SAXException
public void fatal(java.lang.String message) throws org.xml.sax.SAXException
message
- the messageorg.xml.sax.SAXException
org.xml.sax.SAXParseException
public void err(java.lang.String message) throws org.xml.sax.SAXException
message
- the messageorg.xml.sax.SAXException
public void errTreeBuilder(java.lang.String message) throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void warn(java.lang.String message) throws org.xml.sax.SAXException
message
- the messageorg.xml.sax.SAXException
private void resetAttributes()
private void strBufToElementNameString()
private int emitCurrentTagToken(boolean selfClosing, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void attributeNameComplete() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void addAttributeWithoutValue() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void addAttributeWithValue() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private static java.lang.String newAsciiLowerCaseStringFromString(java.lang.String str)
protected void startErrorReporting() throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void start() throws org.xml.sax.SAXException
org.xml.sax.SAXException
public boolean tokenizeBuffer(UTF16Buffer buffer) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private int stateLoop(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private long workAroundHotSpotHugeMethodLimit(int state, char c, int pos, char[] buf, boolean reconsume, int returnState, int endPos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected int transition(int from, int to, boolean reconsume, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void initDoctypeFields()
private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void appendLongStrBufLineFeed()
private void appendLongStrBufCarriageReturn()
protected void silentCarriageReturn()
protected void silentLineFeed()
private void emitCarriageReturn(char[] buf, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void emitReplacementCharacter(char[] buf, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void emitPlaintextReplacementCharacter(char[] buf, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void setAdditionalAndRememberAmpersandLocation(char add)
private void bogusDoctype() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void bogusDoctypeWithoutQuirks() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void emitOrAppendStrBuf(int returnState) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void handleNcrValue(int returnState) throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void eof() throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void emitDoctypeToken(int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected char checkChar(char[] buf, int pos) throws org.xml.sax.SAXException
org.xml.sax.SAXException
public boolean isAlreadyComplainedAboutNonAscii()
public boolean internalEncodingDeclaration(java.lang.String internalCharset) throws org.xml.sax.SAXException
org.xml.sax.SAXException
private void emitOrAppendTwo(char[] val, int returnState) throws org.xml.sax.SAXException
val
- org.xml.sax.SAXException
private void emitOrAppendOne(char[] val, int returnState) throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void end() throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void requestSuspension()
public void becomeConfident()
public boolean isNextCharOnNewLine()
public boolean isPrevCR()
public int getLine()
public int getCol()
public boolean isInDataState()
public void resetToDataState()
public void loadState(Tokenizer other) throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void initializeWithoutStarting() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errGarbageAfterLtSlash() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errLtSlashGt() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errWarnLtSlashInRcdata() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errHtml4LtSlashInRcdata(char folded) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errCharRefLacksSemicolon() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoDigitsInNCR() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errGtInSystemId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errGtInPublicId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNamelessDoctype() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errConsecutiveHyphens() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errPrematureEndOfComment() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errBogusComment() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errUnquotedAttributeValOrNull(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errSlashNotFollowedByGt() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errHtml4XmlVoidSyntax() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoSpaceBetweenAttributes() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errHtml4NonNameInUnquotedAttribute(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errAttributeValueMissing() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errBadCharBeforeAttributeNameOrNull(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEqualsSignBeforeAttributeName() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errBadCharAfterLt(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errLtGt() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errProcessingInstruction() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errUnescapedAmpersandInterpretedAsCharacterReference() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNotSemicolonTerminated() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoNamedCharacterMatch() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errQuoteBeforeAttributeName(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errQuoteOrLtInAttributeNameOrNull(char c) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errExpectedPublicId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errBogusDoctype() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void maybeWarnPrivateUseAstral() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void maybeWarnPrivateUse(char ch) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void maybeErrSlashInEndTag(boolean selfClosing) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected char errNcrNonCharacter(char ch) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errAstralNonCharacter(int ch) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrSurrogate() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected char errNcrControlChar(char ch) throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrCr() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrInC1Range() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInPublicId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInComment() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInDoctype() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInAttributeValue() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInAttributeName() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofWithoutGt() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInTagName() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInEndTag() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofAfterLt() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrOutOfRange() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrUnassigned() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errDuplicateAttribute() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errEofInSystemId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errExpectedSystemId() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errMissingSpaceBeforeDoctypeName() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errHyphenHyphenBang() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrControlChar() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNcrZero() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoSpaceBetweenPublicAndSystemIds() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void noteAttributeWithoutValue() throws org.xml.sax.SAXException
org.xml.sax.SAXException
protected void noteUnquotedAttributeValue() throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
encodingDeclarationHandler
- the encodingDeclarationHandler to setvoid destructor()
public void setTransitionBaseOffset(int offset)
TransitionHandler
.offset
- the offset