Package org.languagetool.tokenizers.gl
Class GalicianWordTokenizer
- java.lang.Object
-
- org.languagetool.tokenizers.WordTokenizer
-
- org.languagetool.tokenizers.gl.GalicianWordTokenizer
-
- All Implemented Interfaces:
org.languagetool.tokenizers.Tokenizer
public class GalicianWordTokenizer extends org.languagetool.tokenizers.WordTokenizer
Tokenizes a sentence into words. Punctuation and whitespace gets its own token.
-
-
Field Summary
Fields Modifier and Type Field Description private static java.util.regex.Pattern
COLON_NUMBERS_PATTERN
private static java.lang.String
COLON_NUMBERS_REPL
private static java.util.regex.Pattern
DATE_PATTERN
private static java.lang.String
DATE_PATTERN_REPL
private static java.util.regex.Pattern
DECIMAL_COMMA_PATTERN
private static java.lang.String
DECIMAL_COMMA_REPL
private static char
DECIMAL_COMMA_SUBST
private static java.util.regex.Pattern
DECIMAL_SPACE_PATTERN
private static java.util.regex.Pattern
DOTTED_NUMBERS_PATTERN
private static java.lang.String
DOTTED_NUMBERS_REPL
private static java.util.regex.Pattern
DOTTED_ORDINALS_PATTERN
private static java.lang.String
DOTTED_ORDINALS_REPL
private static char
NON_BREAKING_COLON_SUBST
private static char
NON_BREAKING_DOT_SUBST
private static char
NON_BREAKING_SPACE_SUBST
private static java.lang.String
SPLIT_CHARS
-
Constructor Summary
Constructors Constructor Description GalicianWordTokenizer()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description java.util.List<java.lang.String>
tokenize(java.lang.String text)
-
-
-
Field Detail
-
SPLIT_CHARS
private static final java.lang.String SPLIT_CHARS
- See Also:
- Constant Field Values
-
DECIMAL_COMMA_SUBST
private static final char DECIMAL_COMMA_SUBST
- See Also:
- Constant Field Values
-
NON_BREAKING_SPACE_SUBST
private static final char NON_BREAKING_SPACE_SUBST
- See Also:
- Constant Field Values
-
NON_BREAKING_DOT_SUBST
private static final char NON_BREAKING_DOT_SUBST
- See Also:
- Constant Field Values
-
NON_BREAKING_COLON_SUBST
private static final char NON_BREAKING_COLON_SUBST
- See Also:
- Constant Field Values
-
DECIMAL_COMMA_PATTERN
private static final java.util.regex.Pattern DECIMAL_COMMA_PATTERN
-
DECIMAL_COMMA_REPL
private static final java.lang.String DECIMAL_COMMA_REPL
- See Also:
- Constant Field Values
-
DECIMAL_SPACE_PATTERN
private static final java.util.regex.Pattern DECIMAL_SPACE_PATTERN
-
DOTTED_NUMBERS_PATTERN
private static final java.util.regex.Pattern DOTTED_NUMBERS_PATTERN
-
DOTTED_NUMBERS_REPL
private static final java.lang.String DOTTED_NUMBERS_REPL
- See Also:
- Constant Field Values
-
COLON_NUMBERS_PATTERN
private static final java.util.regex.Pattern COLON_NUMBERS_PATTERN
-
COLON_NUMBERS_REPL
private static final java.lang.String COLON_NUMBERS_REPL
- See Also:
- Constant Field Values
-
DATE_PATTERN
private static final java.util.regex.Pattern DATE_PATTERN
-
DATE_PATTERN_REPL
private static final java.lang.String DATE_PATTERN_REPL
- See Also:
- Constant Field Values
-
DOTTED_ORDINALS_PATTERN
private static final java.util.regex.Pattern DOTTED_ORDINALS_PATTERN
-
DOTTED_ORDINALS_REPL
private static final java.lang.String DOTTED_ORDINALS_REPL
- See Also:
- Constant Field Values
-
-