Class MetaScanner

  • Direct Known Subclasses:
    MetaSniffer

    public abstract class MetaScanner
    extends java.lang.Object
    • Field Detail

      • CHARSET

        private static final char[] CHARSET
        Constant for "charset".
      • CONTENT

        private static final char[] CONTENT
        Constant for "content".
      • HTTP_EQUIV

        private static final char[] HTTP_EQUIV
        Constant for "http-equiv".
      • CONTENT_TYPE

        private static final char[] CONTENT_TYPE
        Constant for "content-type".
      • BEFORE_ATTRIBUTE_VALUE

        private static final int BEFORE_ATTRIBUTE_VALUE
        See Also:
        Constant Field Values
      • ATTRIBUTE_VALUE_DOUBLE_QUOTED

        private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
        See Also:
        Constant Field Values
      • ATTRIBUTE_VALUE_SINGLE_QUOTED

        private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
        See Also:
        Constant Field Values
      • ATTRIBUTE_VALUE_UNQUOTED

        private static final int ATTRIBUTE_VALUE_UNQUOTED
        See Also:
        Constant Field Values
      • AFTER_ATTRIBUTE_VALUE_QUOTED

        private static final int AFTER_ATTRIBUTE_VALUE_QUOTED
        See Also:
        Constant Field Values
      • MARKUP_DECLARATION_OPEN

        private static final int MARKUP_DECLARATION_OPEN
        See Also:
        Constant Field Values
      • MARKUP_DECLARATION_HYPHEN

        private static final int MARKUP_DECLARATION_HYPHEN
        See Also:
        Constant Field Values
      • SELF_CLOSING_START_TAG

        private static final int SELF_CLOSING_START_TAG
        See Also:
        Constant Field Values
      • HTTP_EQUIV_CONTENT_TYPE

        private static final int HTTP_EQUIV_CONTENT_TYPE
        See Also:
        Constant Field Values
      • readable

        protected ByteReadable readable
        The data source.
      • metaState

        private int metaState
        The state of the state machine that recognizes the tag name "meta".
      • contentIndex

        private int contentIndex
        The current position in recognizing the attribute name "content".
      • charsetIndex

        private int charsetIndex
        The current position in recognizing the attribute name "charset".
      • httpEquivIndex

        private int httpEquivIndex
        The current position in recognizing the attribute name "http-equive".
      • contentTypeIndex

        private int contentTypeIndex
        The current position in recognizing the attribute value "content-type".
      • stateSave

        protected int stateSave
        The tokenizer state.
      • strBufLen

        private int strBufLen
        The currently filled length of strBuf.
      • strBuf

        private char[] strBuf
        Accumulation buffer for attribute values.
      • content

        private java.lang.String content
      • charset

        private java.lang.String charset
      • httpEquivState

        private int httpEquivState
    • Constructor Detail

      • MetaScanner

        public MetaScanner()
    • Method Detail

      • destructor

        private void destructor()
      • read

        protected int read()
                    throws java.io.IOException
        Reads a byte from the data source. -1 means end.
        Returns:
        Throws:
        java.io.IOException
      • stateLoop

        protected final void stateLoop​(int state)
                                throws org.xml.sax.SAXException,
                                       java.io.IOException
        The runs the meta scanning algorithm.
        Throws:
        org.xml.sax.SAXException
        java.io.IOException
      • handleCharInAttributeValue

        private void handleCharInAttributeValue​(int c)
      • toAsciiLowerCase

        private int toAsciiLowerCase​(int c)
      • addToBuffer

        private void addToBuffer​(int c)
        Adds a character to the accumulation buffer.
        Parameters:
        c - the character to add
      • handleAttributeValue

        private void handleAttributeValue()
                                   throws org.xml.sax.SAXException
        Attempts to extract a charset name from the accumulation buffer.
        Throws:
        org.xml.sax.SAXException
      • handleTag

        private boolean handleTag()
                           throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • handleTagInner

        private boolean handleTagInner()
                                throws org.xml.sax.SAXException
        Throws:
        org.xml.sax.SAXException
      • tryCharset

        protected abstract boolean tryCharset​(java.lang.String encoding)
                                       throws org.xml.sax.SAXException
        Tries to switch to an encoding.
        Parameters:
        encoding -
        Returns:
        true if successful
        Throws:
        org.xml.sax.SAXException