Package org.languagetool.rules.ngrams
Class LanguageModelUtils
- java.lang.Object
-
- org.languagetool.rules.ngrams.LanguageModelUtils
-
public final class LanguageModelUtils extends java.lang.Object
-
-
Field Summary
Fields Modifier and Type Field Description private static org.slf4j.Logger
logger
-
Constructor Summary
Constructors Modifier Constructor Description private
LanguageModelUtils()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static double
get3gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, java.lang.String candidate)
(package private) static double
get3gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String term)
static double
get4gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, java.lang.String candidate)
(package private) static double
get4gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String term)
(package private) static java.util.List<java.lang.String>
getContext(GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String newToken, int toLeft, int toRight)
(package private) static java.util.List<java.lang.String>
getContext(GoogleToken token, java.util.List<GoogleToken> tokens, java.util.List<GoogleToken> newTokens, int toLeft, int toRight)
static <T> java.util.List<T>
getContext(T token, java.util.List<T> tokens, java.util.List<T> newTokens, int toLeft, int toRight, java.util.function.Predicate<T> isWhitespace, T endToken)
(package private) static Tokenizer
getGoogleStyleWordTokenizer(Language language)
Return a tokenizer that works more like Google does for its ngram index (which doesn't seem to be properly documented).
-
-
-
Method Detail
-
getGoogleStyleWordTokenizer
static Tokenizer getGoogleStyleWordTokenizer(Language language)
Return a tokenizer that works more like Google does for its ngram index (which doesn't seem to be properly documented).
-
getContext
static java.util.List<java.lang.String> getContext(GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String newToken, int toLeft, int toRight)
-
getContext
static java.util.List<java.lang.String> getContext(GoogleToken token, java.util.List<GoogleToken> tokens, java.util.List<GoogleToken> newTokens, int toLeft, int toRight)
-
getContext
public static <T> java.util.List<T> getContext(T token, java.util.List<T> tokens, java.util.List<T> newTokens, int toLeft, int toRight, java.util.function.Predicate<T> isWhitespace, T endToken)
-
get3gramProbabilityFor
public static double get3gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, java.lang.String candidate)
-
get4gramProbabilityFor
public static double get4gramProbabilityFor(Language lang, LanguageModel lm, int position, AnalyzedSentence sentence, java.lang.String candidate)
-
get3gramProbabilityFor
static double get3gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String term)
-
get4gramProbabilityFor
static double get4gramProbabilityFor(Language lang, LanguageModel lm, GoogleToken token, java.util.List<GoogleToken> tokens, java.lang.String term)
-
-