13#ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
14#define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
55template<
typename FitnessFunction = GiniImpurity,
56 template<
typename>
class NumericSplitType =
58 template<
typename>
class CategoricalSplitType =
59 HoeffdingCategoricalSplit
93 template<
typename MatType>
96 const arma::Row<size_t>& labels,
97 const size_t numClasses,
98 const bool batchTraining =
true,
99 const double successProbability = 0.95,
100 const size_t maxSamples = 0,
101 const size_t checkInterval = 100,
102 const size_t minSamples = 100,
103 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
104 = CategoricalSplitType<FitnessFunction>(0, 0),
105 const NumericSplitType<FitnessFunction>& numericSplitIn =
106 NumericSplitType<FitnessFunction>(0));
131 const size_t numClasses,
132 const double successProbability = 0.95,
133 const size_t maxSamples = 0,
134 const size_t checkInterval = 100,
135 const size_t minSamples = 100,
136 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
137 = CategoricalSplitType<FitnessFunction>(0, 0),
138 const NumericSplitType<FitnessFunction>& numericSplitIn =
139 NumericSplitType<FitnessFunction>(0),
140 std::unordered_map<
size_t, std::pair<size_t, size_t>>*
141 dimensionMappings = NULL,
142 const bool copyDatasetInfo =
true);
171 template<
typename MatType>
173 const arma::Row<size_t>& labels,
174 const bool batchTraining =
true);
180 template<
typename MatType>
183 const arma::Row<size_t>& labels,
184 const bool batchTraining =
true);
192 template<
typename VecType>
193 void Train(
const VecType& point,
const size_t label);
250 template<
typename VecType>
260 template<
typename VecType>
277 template<
typename VecType>
278 void Classify(
const VecType& point,
size_t& prediction,
double& probability)
288 template<
typename MatType>
289 void Classify(
const MatType& data, arma::Row<size_t>& predictions)
const;
302 template<
typename MatType>
304 arma::Row<size_t>& predictions,
305 arma::rowvec& probabilities)
const;
313 template<
typename Archive>
320 std::vector<NumericSplitType<FitnessFunction>> numericSplits;
322 std::vector<CategoricalSplitType<FitnessFunction>> categoricalSplits;
325 std::unordered_map<size_t, std::pair<size_t, size_t>>* dimensionMappings;
336 size_t checkInterval;
344 double successProbability;
349 size_t splitDimension;
351 size_t majorityClass;
354 double majorityProbability;
356 typename CategoricalSplitType<FitnessFunction>::SplitInfo categoricalSplit;
358 typename NumericSplitType<FitnessFunction>::SplitInfo numericSplit;
360 std::vector<HoeffdingTree*> children;
366#include "hoeffding_tree_impl.hpp"
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
void Train(const MatType &data, const data::DatasetInfo &info, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels and the gi...
size_t NumChildren() const
Get the number of children.
void Classify(const MatType &data, arma::Row< size_t > &predictions, arma::rowvec &probabilities) const
Classify the given points, using this node and the entire (sub)tree beneath it.
size_t CheckInterval() const
Get the number of samples before a split check is performed.
void MinSamples(const size_t minSamples)
Modify the minimum number of samples for a split.
size_t NumDescendants() const
Get the size of the Hoeffding Tree.
~HoeffdingTree()
Clean up memory.
size_t MajorityClass() const
Get the majority class.
HoeffdingTree & Child(const size_t i)
Modify a child.
HoeffdingTree(const MatType &data, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining=true, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0))
Construct the Hoeffding tree with the given parameters and given training data.
void SuccessProbability(const double successProbability)
Modify the confidence required for a split.
void Train(const VecType &point, const size_t label)
Train on a single point in streaming mode, with the given label.
HoeffdingTree(const HoeffdingTree &other)
Copy another tree (warning: this will duplicate the tree entirely, and may use a lot of memory.
size_t MaxSamples() const
Get the maximum number of samples before a split is forced.
void Train(const MatType &data, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels.
double MajorityProbability() const
Get the probability of the majority class (based on training samples).
void MaxSamples(const size_t maxSamples)
Modify the maximum number of samples before a split is forced.
double & MajorityProbability()
Modify the probability of the majority class.
void CheckInterval(const size_t checkInterval)
Modify the number of samples before a split check is performed.
size_t & MajorityClass()
Modify the majority class.
void Classify(const VecType &point, size_t &prediction, double &probability) const
Classify the given point and also return an estimate of the probability that the prediction is correc...
size_t SplitCheck()
Check if a split would satisfy the conditions of the Hoeffding bound with the node's specified succes...
size_t SplitDimension() const
Get the splitting dimension (size_t(-1) if no split).
size_t CalculateDirection(const VecType &point) const
Given a point and that this node is not a leaf, calculate the index of the child node this point woul...
size_t Classify(const VecType &point) const
Classify the given point, using this node and the entire (sub)tree beneath it.
void CreateChildren()
Given that this node should split, create the children.
NumericSplitType< FitnessFunction > NumericSplit
Allow access to the numeric split type.
HoeffdingTree()
Construct a Hoeffding tree with no data and no information.
const HoeffdingTree & Child(const size_t i) const
Get a child.
void Classify(const MatType &data, arma::Row< size_t > &predictions) const
Classify the given points, using this node and the entire (sub)tree beneath it.
CategoricalSplitType< FitnessFunction > CategoricalSplit
Allow access to the categorical split type.
HoeffdingTree(const data::DatasetInfo &datasetInfo, const size_t numClasses, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0), std::unordered_map< size_t, std::pair< size_t, size_t > > *dimensionMappings=NULL, const bool copyDatasetInfo=true)
Construct the Hoeffding tree with the given parameters, but training on no data.
double SuccessProbability() const
Get the confidence required for a split.
void serialize(Archive &ar, const unsigned int)
Serialize the split.
size_t MinSamples() const
Get the minimum number of samples for a split.
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.