Class Tokenizer

  • All Implemented Interfaces:
    org.xml.sax.Locator
    Direct Known Subclasses:
    ErrorReportingTokenizer

    public class Tokenizer
    extends java.lang.Object
    implements org.xml.sax.Locator
    An implementation of http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html This class implements the Locator interface. This is not an incidental implementation detail: Users of this class are encouraged to make use of the Locator nature. By default, the tokenizer may report data that XML 1.0 bans. The tokenizer can be configured to treat these conditions as fatal or to coerce the infoset to something that XML 1.0 allows.
    Version:
    $Id$
    • Field Detail

      • ATTRIBUTE_VALUE_DOUBLE_QUOTED

        public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
        See Also:
        Constant Field Values
      • ATTRIBUTE_VALUE_SINGLE_QUOTED

        public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
        See Also:
        Constant Field Values
      • ATTRIBUTE_VALUE_UNQUOTED

        public static final int ATTRIBUTE_VALUE_UNQUOTED
        See Also:
        Constant Field Values
      • AFTER_ATTRIBUTE_VALUE_QUOTED

        public static final int AFTER_ATTRIBUTE_VALUE_QUOTED
        See Also:
        Constant Field Values
      • MARKUP_DECLARATION_OPEN

        public static final int MARKUP_DECLARATION_OPEN
        See Also:
        Constant Field Values
      • BEFORE_DOCTYPE_PUBLIC_IDENTIFIER

        public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
        See Also:
        Constant Field Values
      • DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED

        public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
        See Also:
        Constant Field Values
      • DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED

        public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
        See Also:
        Constant Field Values
      • AFTER_DOCTYPE_PUBLIC_IDENTIFIER

        public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
        See Also:
        Constant Field Values
      • BEFORE_DOCTYPE_SYSTEM_IDENTIFIER

        public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
        See Also:
        Constant Field Values
      • DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED

        public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
        See Also:
        Constant Field Values
      • DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED

        public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
        See Also:
        Constant Field Values
      • AFTER_DOCTYPE_SYSTEM_IDENTIFIER

        public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
        See Also:
        Constant Field Values
      • MARKUP_DECLARATION_HYPHEN

        public static final int MARKUP_DECLARATION_HYPHEN
        See Also:
        Constant Field Values
      • MARKUP_DECLARATION_OCTYPE

        public static final int MARKUP_DECLARATION_OCTYPE
        See Also:
        Constant Field Values
      • AFTER_DOCTYPE_PUBLIC_KEYWORD

        public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD
        See Also:
        Constant Field Values
      • BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS

        public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
        See Also:
        Constant Field Values
      • AFTER_DOCTYPE_SYSTEM_KEYWORD

        public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD
        See Also:
        Constant Field Values
      • CONSUME_CHARACTER_REFERENCE

        public static final int CONSUME_CHARACTER_REFERENCE
        See Also:
        Constant Field Values
      • CHARACTER_REFERENCE_TAIL

        public static final int CHARACTER_REFERENCE_TAIL
        See Also:
        Constant Field Values
      • HANDLE_NCR_VALUE_RECONSUME

        public static final int HANDLE_NCR_VALUE_RECONSUME
        See Also:
        Constant Field Values
      • CHARACTER_REFERENCE_HILO_LOOKUP

        public static final int CHARACTER_REFERENCE_HILO_LOOKUP
        See Also:
        Constant Field Values
      • SCRIPT_DATA_LESS_THAN_SIGN

        public static final int SCRIPT_DATA_LESS_THAN_SIGN
        See Also:
        Constant Field Values
      • SCRIPT_DATA_ESCAPE_START

        public static final int SCRIPT_DATA_ESCAPE_START
        See Also:
        Constant Field Values
      • SCRIPT_DATA_ESCAPE_START_DASH

        public static final int SCRIPT_DATA_ESCAPE_START_DASH
        See Also:
        Constant Field Values
      • SCRIPT_DATA_ESCAPED_DASH

        public static final int SCRIPT_DATA_ESCAPED_DASH
        See Also:
        Constant Field Values
      • SCRIPT_DATA_ESCAPED_DASH_DASH

        public static final int SCRIPT_DATA_ESCAPED_DASH_DASH
        See Also:
        Constant Field Values
      • RAWTEXT_RCDATA_LESS_THAN_SIGN

        public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN
        See Also:
        Constant Field Values
      • SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN

        public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPE_START

        public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPED

        public static final int SCRIPT_DATA_DOUBLE_ESCAPED
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN

        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPED_DASH

        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH

        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
        See Also:
        Constant Field Values
      • SCRIPT_DATA_DOUBLE_ESCAPE_END

        public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END
        See Also:
        Constant Field Values
      • PROCESSING_INSTRUCTION_QUESTION_MARK

        public static final int PROCESSING_INSTRUCTION_QUESTION_MARK
        See Also:
        Constant Field Values
      • LEAD_OFFSET

        private static final int LEAD_OFFSET
        Magic value for UTF-16 operations.
        See Also:
        Constant Field Values
      • LT_GT

        private static final char[] LT_GT
        UTF-16 code unit array containing less than and greater than for emitting those characters on certain parse errors.
      • LT_SOLIDUS

        private static final char[] LT_SOLIDUS
        UTF-16 code unit array containing less than and solidus for emitting those characters on certain parse errors.
      • RSQB_RSQB

        private static final char[] RSQB_RSQB
        UTF-16 code unit array containing ]] for emitting those characters on state transitions.
      • REPLACEMENT_CHARACTER

        private static final char[] REPLACEMENT_CHARACTER
        Array version of U+FFFD.
      • SPACE

        private static final char[] SPACE
        Array version of space.
      • LF

        private static final char[] LF
        Array version of line feed.
      • BUFFER_GROW_BY

        private static final int BUFFER_GROW_BY
        Buffer growth parameter.
        See Also:
        Constant Field Values
      • CDATA_LSQB

        private static final char[] CDATA_LSQB
        "CDATA[" as char[]
      • OCTYPE

        private static final char[] OCTYPE
        "octype" as char[]
      • UBLIC

        private static final char[] UBLIC
        "ublic" as char[]
      • YSTEM

        private static final char[] YSTEM
        "ystem" as char[]
      • TITLE_ARR

        private static final char[] TITLE_ARR
      • SCRIPT_ARR

        private static final char[] SCRIPT_ARR
      • STYLE_ARR

        private static final char[] STYLE_ARR
      • PLAINTEXT_ARR

        private static final char[] PLAINTEXT_ARR
      • XMP_ARR

        private static final char[] XMP_ARR
      • TEXTAREA_ARR

        private static final char[] TEXTAREA_ARR
      • IFRAME_ARR

        private static final char[] IFRAME_ARR
      • NOEMBED_ARR

        private static final char[] NOEMBED_ARR
      • NOSCRIPT_ARR

        private static final char[] NOSCRIPT_ARR
      • NOFRAMES_ARR

        private static final char[] NOFRAMES_ARR
      • tokenHandler

        protected final TokenHandler tokenHandler
        The token handler.
      • errorHandler

        protected org.xml.sax.ErrorHandler errorHandler
        The error handler.
      • lastCR

        protected boolean lastCR
        Whether the previous char read was CR.
      • stateSave

        protected int stateSave
      • returnStateSave

        private int returnStateSave
      • index

        protected int index
      • forceQuirks

        private boolean forceQuirks
      • additional

        private char additional
      • entCol

        private int entCol
      • firstCharKey

        private int firstCharKey
      • lo

        private int lo
      • hi

        private int hi
      • candidate

        private int candidate
      • strBufMark

        private int strBufMark
      • prevValue

        private int prevValue
      • value

        protected int value
      • seenDigits

        private boolean seenDigits
      • cstart

        protected int cstart
      • publicId

        private java.lang.String publicId
        The SAX public id for the resource being tokenized. (Only passed to back as part of locator data.)
      • systemId

        private java.lang.String systemId
        The SAX system id for the resource being tokenized. (Only passed to back as part of locator data.)
      • strBuf

        private char[] strBuf
        Buffer for short identifiers.
      • strBufLen

        private int strBufLen
        Number of significant chars in strBuf.
      • longStrBuf

        private char[] longStrBuf
        Buffer for long strings.
      • longStrBufLen

        private int longStrBufLen
        Number of significant chars in longStrBuf.
      • bmpChar

        private final char[] bmpChar
        Buffer for expanding NCRs falling into the Basic Multilingual Plane.
      • astralChar

        private final char[] astralChar
        Buffer for expanding astral NCRs.
      • endTagExpectation

        protected ElementName endTagExpectation
        The element whose end tag closes the current CDATA or RCDATA element.
      • endTagExpectationAsArray

        private char[] endTagExpectationAsArray
      • endTag

        protected boolean endTag
        true if tokenizing an end tag
      • tagName

        private ElementName tagName
        The current tag token name.
      • attributeName

        protected AttributeName attributeName
        The current attribute name.
      • wantsComments

        private boolean wantsComments
        Whether comment tokens are emitted.
      • html4

        protected boolean html4
        true when HTML4-specific additional errors are requested.
      • metaBoundaryPassed

        private boolean metaBoundaryPassed
        Whether the stream is past the first 512 bytes.
      • doctypeName

        private java.lang.String doctypeName
        The name of the current doctype token.
      • publicIdentifier

        private java.lang.String publicIdentifier
        The public id of the current doctype token.
      • systemIdentifier

        private java.lang.String systemIdentifier
        The system id of the current doctype token.
      • contentSpacePolicy

        private XmlViolationPolicy contentSpacePolicy
        The policy for vertical tab and form feed.
      • html4ModeCompatibleWithXhtml1Schemata

        private boolean html4ModeCompatibleWithXhtml1Schemata
      • newAttributesEachTime

        private final boolean newAttributesEachTime
      • mappingLangToXmlLang

        private int mappingLangToXmlLang
      • shouldSuspend

        private boolean shouldSuspend
      • confident

        protected boolean confident
      • line

        private int line
      • ampersandLocation

        protected LocatorImpl ampersandLocation
    • Constructor Detail

      • Tokenizer

        public Tokenizer​(TokenHandler tokenHandler,
                         boolean newAttributesEachTime)
      • Tokenizer

        public Tokenizer​(TokenHandler tokenHandler)
        The constructor.
        Parameters:
        tokenHandler - the handler for receiving tokens
    • Method Detail

      • setInterner

        public void setInterner​(Interner interner)
      • initLocation

        public void initLocation​(java.lang.String newPublicId,
                                 java.lang.String newSystemId)
      • isMappingLangToXmlLang

        public boolean isMappingLangToXmlLang()
        Returns the mappingLangToXmlLang.
        Returns:
        the mappingLangToXmlLang
      • setMappingLangToXmlLang

        public void setMappingLangToXmlLang​(boolean mappingLangToXmlLang)
        Sets the mappingLangToXmlLang.
        Parameters:
        mappingLangToXmlLang - the mappingLangToXmlLang to set
      • setErrorHandler

        public void setErrorHandler​(org.xml.sax.ErrorHandler eh)
        Sets the error handler.
        See Also:
        XMLReader.setErrorHandler(org.xml.sax.ErrorHandler)
      • getErrorHandler

        public org.xml.sax.ErrorHandler getErrorHandler()
      • setCommentPolicy

        public void setCommentPolicy​(XmlViolationPolicy commentPolicy)
        Sets the commentPolicy.
        Parameters:
        commentPolicy - the commentPolicy to set
      • setContentNonXmlCharPolicy

        public void setContentNonXmlCharPolicy​(XmlViolationPolicy contentNonXmlCharPolicy)
        Sets the contentNonXmlCharPolicy.
        Parameters:
        contentNonXmlCharPolicy - the contentNonXmlCharPolicy to set
      • setContentSpacePolicy

        public void setContentSpacePolicy​(XmlViolationPolicy contentSpacePolicy)
        Sets the contentSpacePolicy.
        Parameters:
        contentSpacePolicy - the contentSpacePolicy to set
      • setXmlnsPolicy

        public void setXmlnsPolicy​(XmlViolationPolicy xmlnsPolicy)
        Sets the xmlnsPolicy.
        Parameters:
        xmlnsPolicy - the xmlnsPolicy to set
      • setHtml4ModeCompatibleWithXhtml1Schemata

        public void setHtml4ModeCompatibleWithXhtml1Schemata​(boolean html4ModeCompatibleWithXhtml1Schemata)
        Sets the html4ModeCompatibleWithXhtml1Schemata.
        Parameters:
        html4ModeCompatibleWithXhtml1Schemata - the html4ModeCompatibleWithXhtml1Schemata to set
      • setStateAndEndTagExpectation

        public void setStateAndEndTagExpectation​(int specialTokenizerState,
                                                 java.lang.String endTagExpectation)
        Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.
        Parameters:
        specialTokenizerState - the tokenizer state to set
        endTagExpectation - the expected end tag for transitioning back to normal
      • setStateAndEndTagExpectation

        public void setStateAndEndTagExpectation​(int specialTokenizerState,
                                                 ElementName endTagExpectation)
        Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.
        Parameters:
        specialTokenizerState - the tokenizer state to set
        endTagExpectation - the expected end tag for transitioning back to normal
      • endTagExpectationToArray

        private void endTagExpectationToArray()
      • setLineNumber

        public void setLineNumber​(int line)
        For C++ use only.
      • getLineNumber

        public int getLineNumber()
        Specified by:
        getLineNumber in interface org.xml.sax.Locator
        See Also:
        Locator.getLineNumber()
      • getColumnNumber

        public int getColumnNumber()
        Specified by:
        getColumnNumber in interface org.xml.sax.Locator
        See Also:
        Locator.getColumnNumber()
      • getPublicId

        public java.lang.String getPublicId()
        Specified by:
        getPublicId in interface org.xml.sax.Locator
        See Also:
        Locator.getPublicId()
      • getSystemId

        public java.lang.String getSystemId()
        Specified by:
        getSystemId in interface org.xml.sax.Locator
        See Also:
        Locator.getSystemId()
      • notifyAboutMetaBoundary

        public void notifyAboutMetaBoundary()
      • turnOnAdditionalHtml4Errors

        void turnOnAdditionalHtml4Errors()
      • clearStrBufAndAppend

        private void clearStrBufAndAppend​(char c)
      • clearStrBuf

        private void clearStrBuf()
      • appendStrBuf

        private void appendStrBuf​(char c)
        Appends to the smaller buffer.
        Parameters:
        c - the UTF-16 code unit to append
      • strBufToString

        protected java.lang.String strBufToString()
        The smaller buffer as a String. Currently only used for error reporting.

        C++ memory note: The return value must be released.

        Returns:
        the smaller buffer as a string
      • strBufToDoctypeName

        private void strBufToDoctypeName()
        Returns the short buffer as a local name. The return value is released in emitDoctypeToken().
      • emitStrBuf

        private void emitStrBuf()
                         throws org.xml.sax.SAXException
        Emits the smaller buffer as character tokens.
        Throws:
        org.xml.sax.SAXException - if the token handler threw
      • clearLongStrBuf

        private void clearLongStrBuf()
      • clearLongStrBufAndAppend

        private void clearLongStrBufAndAppend​(char c)
      • appendLongStrBuf

        private void appendLongStrBuf​(char c)
        Appends to the larger buffer.
        Parameters:
        c - the UTF-16 code unit to append
      • appendSecondHyphenToBogusComment

        private void appendSecondHyphenToBogusComment()
                                               throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • maybeAppendSpaceToBogusComment

        private void maybeAppendSpaceToBogusComment()
                                             throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • adjustDoubleHyphenAndAppendToLongStrBufAndErr

        private void adjustDoubleHyphenAndAppendToLongStrBufAndErr​(char c)
                                                            throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • appendLongStrBuf

        private void appendLongStrBuf​(char[] buffer,
                                      int offset,
                                      int length)
      • appendStrBufToLongStrBuf

        private void appendStrBufToLongStrBuf()
        Append the contents of the smaller buffer to the larger one.
      • longStrBufToString

        private java.lang.String longStrBufToString()
        The larger buffer as a string.

        C++ memory note: The return value must be released.

        Returns:
        the larger buffer as a string
      • emitComment

        private void emitComment​(int provisionalHyphens,
                                 int pos)
                          throws org.xml.sax.SAXException
        Emits the current comment token.
        Parameters:
        pos - TODO
        Throws:
        org.xml.sax.SAXException
      • flushChars

        protected void flushChars​(char[] buf,
                                  int pos)
                           throws org.xml.sax.SAXException
        Flushes coalesced character tokens.
        Parameters:
        buf - TODO
        pos - TODO
        Throws:
        org.xml.sax.SAXException
      • fatal

        public void fatal​(java.lang.String message)
                   throws org.xml.sax.SAXException
        Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.
        Parameters:
        message - the message
        Throws:
        org.xml.sax.SAXException
        org.xml.sax.SAXParseException
      • err

        public void err​(java.lang.String message)
                 throws org.xml.sax.SAXException
        Reports a Parse Error.
        Parameters:
        message - the message
        Throws:
        org.xml.sax.SAXException
      • errTreeBuilder

        public void errTreeBuilder​(java.lang.String message)
                            throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • warn

        public void warn​(java.lang.String message)
                  throws org.xml.sax.SAXException
        Reports a warning
        Parameters:
        message - the message
        Throws:
        org.xml.sax.SAXException
      • resetAttributes

        private void resetAttributes()
      • strBufToElementNameString

        private void strBufToElementNameString()
      • emitCurrentTagToken

        private int emitCurrentTagToken​(boolean selfClosing,
                                        int pos)
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • attributeNameComplete

        private void attributeNameComplete()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • addAttributeWithoutValue

        private void addAttributeWithoutValue()
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • addAttributeWithValue

        private void addAttributeWithValue()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • newAsciiLowerCaseStringFromString

        private static java.lang.String newAsciiLowerCaseStringFromString​(java.lang.String str)
      • startErrorReporting

        protected void startErrorReporting()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • start

        public void start()
                   throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • tokenizeBuffer

        public boolean tokenizeBuffer​(UTF16Buffer buffer)
                               throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • stateLoop

        private int stateLoop​(int state,
                              char c,
                              int pos,
                              char[] buf,
                              boolean reconsume,
                              int returnState,
                              int endPos)
                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • workAroundHotSpotHugeMethodLimit

        private long workAroundHotSpotHugeMethodLimit​(int state,
                                                      char c,
                                                      int pos,
                                                      char[] buf,
                                                      boolean reconsume,
                                                      int returnState,
                                                      int endPos)
                                               throws org.xml.sax.SAXException
        compressed returnValue: int returnState = returnValue >> 33 boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0) int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue
        Throws:
        org.xml.sax.SAXException
      • transition

        protected int transition​(int from,
                                 int to,
                                 boolean reconsume,
                                 int pos)
                          throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • initDoctypeFields

        private void initDoctypeFields()
      • adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn

        private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
                                                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • adjustDoubleHyphenAndAppendToLongStrBufLineFeed

        private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
                                                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • appendLongStrBufLineFeed

        private void appendLongStrBufLineFeed()
      • appendLongStrBufCarriageReturn

        private void appendLongStrBufCarriageReturn()
      • silentCarriageReturn

        protected void silentCarriageReturn()
      • silentLineFeed

        protected void silentLineFeed()
      • emitCarriageReturn

        private void emitCarriageReturn​(char[] buf,
                                        int pos)
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • emitReplacementCharacter

        private void emitReplacementCharacter​(char[] buf,
                                              int pos)
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • emitPlaintextReplacementCharacter

        private void emitPlaintextReplacementCharacter​(char[] buf,
                                                       int pos)
                                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • setAdditionalAndRememberAmpersandLocation

        private void setAdditionalAndRememberAmpersandLocation​(char add)
      • bogusDoctype

        private void bogusDoctype()
                           throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • bogusDoctypeWithoutQuirks

        private void bogusDoctypeWithoutQuirks()
                                        throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • emitOrAppendStrBuf

        private void emitOrAppendStrBuf​(int returnState)
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • handleNcrValue

        private void handleNcrValue​(int returnState)
                             throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • eof

        public void eof()
                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • emitDoctypeToken

        private void emitDoctypeToken​(int pos)
                               throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • checkChar

        protected char checkChar​(char[] buf,
                                 int pos)
                          throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • isAlreadyComplainedAboutNonAscii

        public boolean isAlreadyComplainedAboutNonAscii()
        Returns the alreadyComplainedAboutNonAscii.
        Returns:
        the alreadyComplainedAboutNonAscii
      • internalEncodingDeclaration

        public boolean internalEncodingDeclaration​(java.lang.String internalCharset)
                                            throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • emitOrAppendTwo

        private void emitOrAppendTwo​(char[] val,
                                     int returnState)
                              throws org.xml.sax.SAXException
        Parameters:
        val -
        Throws:
        org.xml.sax.SAXException
      • emitOrAppendOne

        private void emitOrAppendOne​(char[] val,
                                     int returnState)
                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • end

        public void end()
                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • requestSuspension

        public void requestSuspension()
      • becomeConfident

        public void becomeConfident()
      • isNextCharOnNewLine

        public boolean isNextCharOnNewLine()
        Returns the nextCharOnNewLine.
        Returns:
        the nextCharOnNewLine
      • isPrevCR

        public boolean isPrevCR()
      • getLine

        public int getLine()
        Returns the line.
        Returns:
        the line
      • getCol

        public int getCol()
        Returns the col.
        Returns:
        the col
      • isInDataState

        public boolean isInDataState()
      • resetToDataState

        public void resetToDataState()
      • loadState

        public void loadState​(Tokenizer other)
                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • initializeWithoutStarting

        public void initializeWithoutStarting()
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errGarbageAfterLtSlash

        protected void errGarbageAfterLtSlash()
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errLtSlashGt

        protected void errLtSlashGt()
                             throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errWarnLtSlashInRcdata

        protected void errWarnLtSlashInRcdata()
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errHtml4LtSlashInRcdata

        protected void errHtml4LtSlashInRcdata​(char folded)
                                        throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errCharRefLacksSemicolon

        protected void errCharRefLacksSemicolon()
                                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoDigitsInNCR

        protected void errNoDigitsInNCR()
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errGtInSystemId

        protected void errGtInSystemId()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errGtInPublicId

        protected void errGtInPublicId()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNamelessDoctype

        protected void errNamelessDoctype()
                                   throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errConsecutiveHyphens

        protected void errConsecutiveHyphens()
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errPrematureEndOfComment

        protected void errPrematureEndOfComment()
                                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errBogusComment

        protected void errBogusComment()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errUnquotedAttributeValOrNull

        protected void errUnquotedAttributeValOrNull​(char c)
                                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errSlashNotFollowedByGt

        protected void errSlashNotFollowedByGt()
                                        throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errHtml4XmlVoidSyntax

        protected void errHtml4XmlVoidSyntax()
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoSpaceBetweenAttributes

        protected void errNoSpaceBetweenAttributes()
                                            throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errHtml4NonNameInUnquotedAttribute

        protected void errHtml4NonNameInUnquotedAttribute​(char c)
                                                   throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errLtOrEqualsOrGraveInUnquotedAttributeOrNull

        protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull​(char c)
                                                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errAttributeValueMissing

        protected void errAttributeValueMissing()
                                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errBadCharBeforeAttributeNameOrNull

        protected void errBadCharBeforeAttributeNameOrNull​(char c)
                                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEqualsSignBeforeAttributeName

        protected void errEqualsSignBeforeAttributeName()
                                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errBadCharAfterLt

        protected void errBadCharAfterLt​(char c)
                                  throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errLtGt

        protected void errLtGt()
                        throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errProcessingInstruction

        protected void errProcessingInstruction()
                                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errUnescapedAmpersandInterpretedAsCharacterReference

        protected void errUnescapedAmpersandInterpretedAsCharacterReference()
                                                                     throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNotSemicolonTerminated

        protected void errNotSemicolonTerminated()
                                          throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoNamedCharacterMatch

        protected void errNoNamedCharacterMatch()
                                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errQuoteBeforeAttributeName

        protected void errQuoteBeforeAttributeName​(char c)
                                            throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errQuoteOrLtInAttributeNameOrNull

        protected void errQuoteOrLtInAttributeNameOrNull​(char c)
                                                  throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errExpectedPublicId

        protected void errExpectedPublicId()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errBogusDoctype

        protected void errBogusDoctype()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • maybeWarnPrivateUseAstral

        protected void maybeWarnPrivateUseAstral()
                                          throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • maybeWarnPrivateUse

        protected void maybeWarnPrivateUse​(char ch)
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • maybeErrAttributesOnEndTag

        protected void maybeErrAttributesOnEndTag​(HtmlAttributes attrs)
                                           throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • maybeErrSlashInEndTag

        protected void maybeErrSlashInEndTag​(boolean selfClosing)
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrNonCharacter

        protected char errNcrNonCharacter​(char ch)
                                   throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errAstralNonCharacter

        protected void errAstralNonCharacter​(int ch)
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrSurrogate

        protected void errNcrSurrogate()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrControlChar

        protected char errNcrControlChar​(char ch)
                                  throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrCr

        protected void errNcrCr()
                         throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrInC1Range

        protected void errNcrInC1Range()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInPublicId

        protected void errEofInPublicId()
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInComment

        protected void errEofInComment()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInDoctype

        protected void errEofInDoctype()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInAttributeValue

        protected void errEofInAttributeValue()
                                       throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInAttributeName

        protected void errEofInAttributeName()
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofWithoutGt

        protected void errEofWithoutGt()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInTagName

        protected void errEofInTagName()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInEndTag

        protected void errEofInEndTag()
                               throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofAfterLt

        protected void errEofAfterLt()
                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrOutOfRange

        protected void errNcrOutOfRange()
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrUnassigned

        protected void errNcrUnassigned()
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errDuplicateAttribute

        protected void errDuplicateAttribute()
                                      throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errEofInSystemId

        protected void errEofInSystemId()
                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errExpectedSystemId

        protected void errExpectedSystemId()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errMissingSpaceBeforeDoctypeName

        protected void errMissingSpaceBeforeDoctypeName()
                                                 throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errHyphenHyphenBang

        protected void errHyphenHyphenBang()
                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrControlChar

        protected void errNcrControlChar()
                                  throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNcrZero

        protected void errNcrZero()
                           throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoSpaceBetweenDoctypeSystemKeywordAndQuote

        protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
                                                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoSpaceBetweenPublicAndSystemIds

        protected void errNoSpaceBetweenPublicAndSystemIds()
                                                    throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • errNoSpaceBetweenDoctypePublicKeywordAndQuote

        protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
                                                              throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • noteAttributeWithoutValue

        protected void noteAttributeWithoutValue()
                                          throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • noteUnquotedAttributeValue

        protected void noteUnquotedAttributeValue()
                                           throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • setEncodingDeclarationHandler

        public void setEncodingDeclarationHandler​(EncodingDeclarationHandler encodingDeclarationHandler)
        Sets the encodingDeclarationHandler.
        Parameters:
        encodingDeclarationHandler - the encodingDeclarationHandler to set
      • destructor

        void destructor()
      • setTransitionBaseOffset

        public void setTransitionBaseOffset​(int offset)
        Sets an offset to be added to the position reported to TransitionHandler.
        Parameters:
        offset - the offset