Package org.jcodings

Class Encoding

java.lang.Object
org.jcodings.Encoding
All Implemented Interfaces:
Cloneable
Direct Known Subclasses:
AbstractEncoding

public abstract class Encoding extends Object implements Cloneable
  • Field Summary

    Fields
    Modifier and Type
    Field
    Description
    static final int
     
    private Charset
     
    private static int
     
    private int
     
    private int
     
    private boolean
     
    private boolean
     
    private final boolean
     
    private final boolean
     
    protected boolean
     
    protected boolean
     
    protected final int
     
    protected final int
     
    private byte[]
     
    static final byte
     
    private String
     
  • Constructor Summary

    Constructors
    Modifier
    Constructor
    Description
    protected
    Encoding(String name, int minLength, int maxLength)
     
  • Method Summary

    Modifier and Type
    Method
    Description
    abstract void
    Expand case folds given a character class (used for case insensitive matching)
    static byte
    asciiToLower(int c)
     
    static byte
    asciiToUpper(int c)
     
    abstract CaseFoldCodeItem[]
    caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
    Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str
    abstract int
    caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd)
    Oniguruma equivalent: case_map
    abstract int
    codeToMbc(int code, byte[] bytes, int p)
    Extracts code point into it's multibyte representation
    abstract int
    codeToMbcLength(int code)
    Returns character length given a code point Oniguruma equivalent: code_to_mbclen
    abstract int[]
    ctypeCodeRange(int ctype, IntHolder sbOut)
    Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range
    static int
    digitVal(int code)
     
    final boolean
    equals(Object other)
     
    If this encoding is capable of being represented by a Java Charset then provide it.
    The name of the equivalent Java Charset for this encoding.
    final int
     
    final byte[]
     
    final int
     
    final boolean
    isAlnum(int code)
     
    final boolean
    isAlpha(int code)
     
    static boolean
    isAscii(byte b)
     
    static boolean
    isAscii(int code)
     
    final boolean
     
    final boolean
    isBlank(int code)
     
    final boolean
    isCntrl(int code)
     
    abstract boolean
    isCodeCType(int code, int ctype)
    Perform a check whether given code is of given character type (e.g.
    final boolean
    isDigit(int code)
     
    final boolean
     
    final boolean
     
    final boolean
    isGraph(int code)
     
    final boolean
    isLower(int code)
     
    static boolean
    isMbcAscii(byte b)
     
    boolean
    isMbcCrnl(byte[] bytes, int p, int end)
     
    final boolean
    isMbcHead(byte[] bytes, int p, int end)
     
    final boolean
    isMbcWord(byte[] bytes, int p, int end)
     
    abstract boolean
    isNewLine(byte[] bytes, int p, int end)
    Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline
    final boolean
    isNewLine(int code)
     
    final boolean
    isPrint(int code)
     
    final boolean
    isPunct(int code)
     
    abstract boolean
    isReverseMatchAllowed(byte[] bytes, int p, int end)
    Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match
    final boolean
    isSbWord(int code)
     
    final boolean
     
    final boolean
    isSpace(int code)
     
    final boolean
     
    final boolean
    isUpper(int code)
     
    final boolean
     
    final boolean
    isWord(int code)
     
    static boolean
    isWordGraphPrint(int ctype)
     
    final boolean
    isXDigit(int code)
     
    abstract int
    leftAdjustCharHead(byte[] bytes, int p, int s, int end)
    Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head
    abstract int
    length(byte c)
    Returns character length given character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.
    abstract int
    length(byte[] bytes, int p, int end)
    Returns character length given stream, character position and stream end returns 1 for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise
    static Encoding
    load(String name)
     
    static Encoding
    load(String name, String pkg)
     
    final int
    Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len
    final int
    Deprecated.
    abstract int
    mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
    Performs case folding for a character at bytes[pp.value]
    final int
    Deprecated.
    abstract int
    mbcToCode(byte[] bytes, int p, int end)
    Returns code point for a character Oniguruma equivalent: mbc_to_code
    final int
    Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len
    static int
    odigitVal(int code)
     
    final int
    prevCharHead(byte[] bytes, int p, int s, int end)
     
    abstract int
    propertyNameToCType(byte[] bytes, int p, int end)
    Returns character type given character type name (used when e.g.
    (package private) Encoding
    replicate(byte[] name)
     
    final int
    rightAdjustCharHead(byte[] bytes, int p, int s, int end)
     
    final int
    rightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev)
     
    protected final void
     
    protected final void
    setName(byte[] name)
     
    protected final void
     
    final int
    step(byte[] bytes, int p, int end, int n)
     
    final int
    stepBack(byte[] bytes, int p, int s, int end, int n)
     
    final int
    strByteLengthNull(byte[] bytes, int p, int end)
     
    abstract int
    strCodeAt(byte[] bytes, int p, int end, int index)
     
    abstract int
    strLength(byte[] bytes, int p, int end)
     
    final int
    strLengthNull(byte[] bytes, int p, int end)
     
    final int
    strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
     
    byte[]
    Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings
    final String
     
    final int
    xdigitVal(int code)
     

    Methods inherited from class java.lang.Object

    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
  • Field Details

    • CHAR_INVALID

      public static final int CHAR_INVALID
      See Also:
    • count

      private static int count
    • minLength

      protected final int minLength
    • maxLength

      protected final int maxLength
    • isFixedWidth

      private final boolean isFixedWidth
    • isSingleByte

      private final boolean isSingleByte
    • isAsciiCompatible

      private boolean isAsciiCompatible
    • isUnicode

      protected boolean isUnicode
    • isUTF8

      protected boolean isUTF8
    • name

      private byte[] name
    • hashCode

      private int hashCode
    • index

      private int index
    • charset

      private Charset charset
    • isDummy

      private boolean isDummy
    • stringName

      private String stringName
    • NEW_LINE

      public static final byte NEW_LINE
      See Also:
  • Constructor Details

    • Encoding

      protected Encoding(String name, int minLength, int maxLength)
  • Method Details

    • setName

      protected final void setName(String name)
    • setName

      protected final void setName(byte[] name)
    • setDummy

      protected final void setDummy()
    • toString

      public final String toString()
      Overrides:
      toString in class Object
    • equals

      public final boolean equals(Object other)
      Overrides:
      equals in class Object
    • hashCode

      public final int hashCode()
      Overrides:
      hashCode in class Object
    • getIndex

      public final int getIndex()
    • getName

      public final byte[] getName()
    • isDummy

      public final boolean isDummy()
    • isAsciiCompatible

      public final boolean isAsciiCompatible()
    • isUnicode

      public final boolean isUnicode()
    • isUTF8

      public final boolean isUTF8()
    • getCharset

      public Charset getCharset()
      If this encoding is capable of being represented by a Java Charset then provide it. Otherwise this will raise a CharsetNotFound error via the JDK APIs. To reduce cases like jruby/jruby#4716, we always attempt to find a charset here, and default to using the encoding name which is never null. Either the encoding will exist in the JDK or it will fail hard, rather than propagating a null Charset. Encodings with names different than those found in the JDK can override this getCharsetName to provide that name or getCharset to return the right Charset.
    • getCharsetName

      public String getCharsetName()
      The name of the equivalent Java Charset for this encoding. Defaults to the name of the encoding. Subclasses can override this to provide a different name.
      Returns:
      the name of the equivalent Java Charset for this encoding
    • replicate

      Encoding replicate(byte[] name)
    • length

      public abstract int length(byte c)
      Returns character length given character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.
      Parameters:
      c - Character head Oniguruma equivalent: mbc_enc_len To be deprecated very soon (use length(byte[]bytes, int p, int end) version)
    • length

      public abstract int length(byte[] bytes, int p, int end)
      Returns character length given stream, character position and stream end returns 1 for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise
      Returns:
      0 Never > 0 Valid character, length returned -1 Illegal/malformed character < -1 (-1 - n) Number of missing bytes for character in p...end range Oniguruma equivalent: mbc_enc_len modified for 1.9 purposes,
    • maxLength

      public final int maxLength()
      Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len
    • maxLengthDistance

      @Deprecated public final int maxLengthDistance()
      Deprecated.
    • minLength

      public final int minLength()
      Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len
    • isNewLine

      public abstract boolean isNewLine(byte[] bytes, int p, int end)
      Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline
    • mbcToCode

      public abstract int mbcToCode(byte[] bytes, int p, int end)
      Returns code point for a character Oniguruma equivalent: mbc_to_code
    • codeToMbcLength

      public abstract int codeToMbcLength(int code)
      Returns character length given a code point Oniguruma equivalent: code_to_mbclen
    • codeToMbc

      public abstract int codeToMbc(int code, byte[] bytes, int p)
      Extracts code point into it's multibyte representation
      Returns:
      character length for the given code point Oniguruma equivalent: code_to_mbc
    • mbcCaseFold

      public abstract int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
      Performs case folding for a character at bytes[pp.value]
      Parameters:
      flag - case fold flag
      pp - an IntHolder that points at character head
      to - a buffer where to extract case folded character Oniguruma equivalent: mbc_case_fold
    • toLowerCaseTable

      public byte[] toLowerCaseTable()
      Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings
      Returns:
      lower case table
    • applyAllCaseFold

      public abstract void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg)
      Expand case folds given a character class (used for case insensitive matching)
      Parameters:
      flag - case fold flag
      fun - case folding functor (look at: ApplyCaseFold)
      arg - case folding functor argument (look at: ApplyCaseFoldArg) Oniguruma equivalent: apply_all_case_fold
    • caseFoldCodesByString

      public abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
      Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str
    • propertyNameToCType

      public abstract int propertyNameToCType(byte[] bytes, int p, int end)
      Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent: property_name_to_ctype
    • isCodeCType

      public abstract boolean isCodeCType(int code, int ctype)
      Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)
      Parameters:
      code - a code point of a character
      ctype - a character type to check against Oniguruma equivalent: is_code_ctype
    • ctypeCodeRange

      public abstract int[] ctypeCodeRange(int ctype, IntHolder sbOut)
      Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range
    • leftAdjustCharHead

      public abstract int leftAdjustCharHead(byte[] bytes, int p, int s, int end)
      Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head
      Parameters:
      bytes - byte stream
      p - position
      s - stop
      end - end
    • isReverseMatchAllowed

      public abstract boolean isReverseMatchAllowed(byte[] bytes, int p, int end)
      Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match
    • caseMap

      public abstract int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd)
      Oniguruma equivalent: case_map
    • rightAdjustCharHead

      public final int rightAdjustCharHead(byte[] bytes, int p, int s, int end)
    • rightAdjustCharHeadWithPrev

      public final int rightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev)
    • prevCharHead

      public final int prevCharHead(byte[] bytes, int p, int s, int end)
    • stepBack

      public final int stepBack(byte[] bytes, int p, int s, int end, int n)
    • step

      public final int step(byte[] bytes, int p, int end, int n)
    • strLength

      public abstract int strLength(byte[] bytes, int p, int end)
    • strCodeAt

      public abstract int strCodeAt(byte[] bytes, int p, int end, int index)
    • strLengthNull

      public final int strLengthNull(byte[] bytes, int p, int end)
    • strByteLengthNull

      public final int strByteLengthNull(byte[] bytes, int p, int end)
    • strNCmp

      public final int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
    • isNewLine

      public final boolean isNewLine(int code)
    • isGraph

      public final boolean isGraph(int code)
    • isPrint

      public final boolean isPrint(int code)
    • isAlnum

      public final boolean isAlnum(int code)
    • isAlpha

      public final boolean isAlpha(int code)
    • isLower

      public final boolean isLower(int code)
    • isUpper

      public final boolean isUpper(int code)
    • isCntrl

      public final boolean isCntrl(int code)
    • isPunct

      public final boolean isPunct(int code)
    • isSpace

      public final boolean isSpace(int code)
    • isBlank

      public final boolean isBlank(int code)
    • isDigit

      public final boolean isDigit(int code)
    • isXDigit

      public final boolean isXDigit(int code)
    • isWord

      public final boolean isWord(int code)
    • isMbcWord

      public final boolean isMbcWord(byte[] bytes, int p, int end)
    • isSbWord

      public final boolean isSbWord(int code)
    • isMbcHead

      public final boolean isMbcHead(byte[] bytes, int p, int end)
    • isMbcCrnl

      public boolean isMbcCrnl(byte[] bytes, int p, int end)
    • digitVal

      public static int digitVal(int code)
    • odigitVal

      public static int odigitVal(int code)
    • xdigitVal

      public final int xdigitVal(int code)
    • isMbcAscii

      public static boolean isMbcAscii(byte b)
    • isAscii

      public static boolean isAscii(int code)
    • isAscii

      public static boolean isAscii(byte b)
    • asciiToLower

      public static byte asciiToLower(int c)
    • asciiToUpper

      public static byte asciiToUpper(int c)
    • isWordGraphPrint

      public static boolean isWordGraphPrint(int ctype)
    • mbcodeStartPosition

      @Deprecated public final int mbcodeStartPosition()
      Deprecated.
    • isSingleByte

      public final boolean isSingleByte()
    • isFixedWidth

      public final boolean isFixedWidth()
    • load

      public static Encoding load(String name)
    • load

      public static Encoding load(String name, String pkg)