xref: /OK3568_Linux_fs/external/rknpu2/examples/3rdparty/opencv/opencv-linux-aarch64/include/opencv2/ml.hpp (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Copyright (C) 2014, Itseez Inc, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #ifndef OPENCV_ML_HPP
45 #define OPENCV_ML_HPP
46 
47 #ifdef __cplusplus
48 #  include "opencv2/core.hpp"
49 #endif
50 
51 #ifdef __cplusplus
52 
53 #include <float.h>
54 #include <map>
55 #include <iostream>
56 
57 /**
58   @defgroup ml Machine Learning
59 
60   The Machine Learning Library (MLL) is a set of classes and functions for statistical
61   classification, regression, and clustering of data.
62 
63   Most of the classification and regression algorithms are implemented as C++ classes. As the
64   algorithms have different sets of features (like an ability to handle missing measurements or
65   categorical input variables), there is a little common ground between the classes. This common
66   ground is defined by the class cv::ml::StatModel that all the other ML classes are derived from.
67 
68   See detailed overview here: @ref ml_intro.
69  */
70 
71 namespace cv
72 {
73 
74 namespace ml
75 {
76 
77 //! @addtogroup ml
78 //! @{
79 
80 /** @brief Variable types */
81 enum VariableTypes
82 {
83     VAR_NUMERICAL    =0, //!< same as VAR_ORDERED
84     VAR_ORDERED      =0, //!< ordered variables
85     VAR_CATEGORICAL  =1  //!< categorical variables
86 };
87 
88 /** @brief %Error types */
89 enum ErrorTypes
90 {
91     TEST_ERROR = 0,
92     TRAIN_ERROR = 1
93 };
94 
95 /** @brief Sample types */
96 enum SampleTypes
97 {
98     ROW_SAMPLE = 0, //!< each training sample is a row of samples
99     COL_SAMPLE = 1  //!< each training sample occupies a column of samples
100 };
101 
102 /** @brief The structure represents the logarithmic grid range of statmodel parameters.
103 
104 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
105 being computed by cross-validation.
106  */
107 class CV_EXPORTS_W ParamGrid
108 {
109 public:
110     /** @brief Default constructor */
111     ParamGrid();
112     /** @brief Constructor with parameters */
113     ParamGrid(double _minVal, double _maxVal, double _logStep);
114 
115     CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
116     CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
117     /** @brief Logarithmic step for iterating the statmodel parameter.
118 
119     The grid determines the following iteration sequence of the statmodel parameter values:
120     \f[(minVal, minVal*step, minVal*{step}^2, \dots,  minVal*{logStep}^n),\f]
121     where \f$n\f$ is the maximal index satisfying
122     \f[\texttt{minVal} * \texttt{logStep} ^n <  \texttt{maxVal}\f]
123     The grid is logarithmic, so logStep must always be greater then 1. Default value is 1.
124     */
125     CV_PROP_RW double logStep;
126 
127     /** @brief Creates a ParamGrid Ptr that can be given to the %SVM::trainAuto method
128 
129     @param minVal minimum value of the parameter grid
130     @param maxVal maximum value of the parameter grid
131     @param logstep Logarithmic step for iterating the statmodel parameter
132     */
133     CV_WRAP static Ptr<ParamGrid> create(double minVal=0., double maxVal=0., double logstep=1.);
134 };
135 
136 /** @brief Class encapsulating training data.
137 
138 Please note that the class only specifies the interface of training data, but not implementation.
139 All the statistical model classes in _ml_ module accepts Ptr\<TrainData\> as parameter. In other
140 words, you can create your own class derived from TrainData and pass smart pointer to the instance
141 of this class into StatModel::train.
142 
143 @sa @ref ml_intro_data
144  */
145 class CV_EXPORTS_W TrainData
146 {
147 public:
missingValue()148     static inline float missingValue() { return FLT_MAX; }
149     virtual ~TrainData();
150 
151     CV_WRAP virtual int getLayout() const = 0;
152     CV_WRAP virtual int getNTrainSamples() const = 0;
153     CV_WRAP virtual int getNTestSamples() const = 0;
154     CV_WRAP virtual int getNSamples() const = 0;
155     CV_WRAP virtual int getNVars() const = 0;
156     CV_WRAP virtual int getNAllVars() const = 0;
157 
158     CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
159     CV_WRAP virtual Mat getSamples() const = 0;
160     CV_WRAP virtual Mat getMissing() const = 0;
161 
162     /** @brief Returns matrix of train samples
163 
164     @param layout The requested layout. If it's different from the initial one, the matrix is
165         transposed. See ml::SampleTypes.
166     @param compressSamples if true, the function returns only the training samples (specified by
167         sampleIdx)
168     @param compressVars if true, the function returns the shorter training samples, containing only
169         the active variables.
170 
171     In current implementation the function tries to avoid physical data copying and returns the
172     matrix stored inside TrainData (unless the transposition or compression is needed).
173      */
174     CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
175                                 bool compressSamples=true,
176                                 bool compressVars=true) const = 0;
177 
178     /** @brief Returns the vector of responses
179 
180     The function returns ordered or the original categorical responses. Usually it's used in
181     regression algorithms.
182      */
183     CV_WRAP virtual Mat getTrainResponses() const = 0;
184 
185     /** @brief Returns the vector of normalized categorical responses
186 
187     The function returns vector of responses. Each response is integer from `0` to `<number of
188     classes>-1`. The actual label value can be retrieved then from the class label vector, see
189     TrainData::getClassLabels.
190      */
191     CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
192     CV_WRAP virtual Mat getTestResponses() const = 0;
193     CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
194     CV_WRAP virtual Mat getResponses() const = 0;
195     CV_WRAP virtual Mat getNormCatResponses() const = 0;
196     CV_WRAP virtual Mat getSampleWeights() const = 0;
197     CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
198     CV_WRAP virtual Mat getTestSampleWeights() const = 0;
199     CV_WRAP virtual Mat getVarIdx() const = 0;
200     CV_WRAP virtual Mat getVarType() const = 0;
201     CV_WRAP Mat getVarSymbolFlags() const;
202     CV_WRAP virtual int getResponseType() const = 0;
203     CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
204     CV_WRAP virtual Mat getTestSampleIdx() const = 0;
205     CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
206     virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
207     CV_WRAP virtual Mat getDefaultSubstValues() const = 0;
208 
209     CV_WRAP virtual int getCatCount(int vi) const = 0;
210 
211     /** @brief Returns the vector of class labels
212 
213     The function returns vector of unique labels occurred in the responses.
214      */
215     CV_WRAP virtual Mat getClassLabels() const = 0;
216 
217     CV_WRAP virtual Mat getCatOfs() const = 0;
218     CV_WRAP virtual Mat getCatMap() const = 0;
219 
220     /** @brief Splits the training data into the training and test parts
221     @sa TrainData::setTrainTestSplitRatio
222      */
223     CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
224 
225     /** @brief Splits the training data into the training and test parts
226 
227     The function selects a subset of specified relative size and then returns it as the training
228     set. If the function is not called, all the data is used for training. Please, note that for
229     each of TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test
230     subset can be retrieved and processed as well.
231     @sa TrainData::setTrainTestSplit
232      */
233     CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
234     CV_WRAP virtual void shuffleTrainTest() = 0;
235 
236     /** @brief Returns matrix of test samples */
237     CV_WRAP Mat getTestSamples() const;
238 
239     /** @brief Returns vector of symbolic names captured in loadFromCSV() */
240     CV_WRAP void getNames(std::vector<String>& names) const;
241 
242     CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx);
243 
244     /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
245 
246     @param filename The input file name
247     @param headerLineCount The number of lines in the beginning to skip; besides the header, the
248         function also skips empty lines and lines staring with `#`
249     @param responseStartIdx Index of the first output variable. If -1, the function considers the
250         last variable as the response
251     @param responseEndIdx Index of the last output variable + 1. If -1, then there is single
252         response variable at responseStartIdx.
253     @param varTypeSpec The optional text string that specifies the variables' types. It has the
254         format `ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]`. That is, variables from `n1 to n2`
255         (inclusive range), `n3`, `n4 to n5` ... are considered ordered and `n6`, `n7 to n8` ... are
256         considered as categorical. The range `[n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8]`
257         should cover all the variables. If varTypeSpec is not specified, then algorithm uses the
258         following rules:
259         - all input variables are considered ordered by default. If some column contains has non-
260           numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding
261           variable is considered categorical.
262         - if there are several output variables, they are all considered as ordered. Error is
263           reported when non-numerical values are used.
264         - if there is a single output variable, then if its values are non-numerical or are all
265           integers, then it's considered categorical. Otherwise, it's considered ordered.
266     @param delimiter The character used to separate values in each line.
267     @param missch The character used to specify missing measurements. It should not be a digit.
268         Although it's a non-numerical value, it surely does not affect the decision of whether the
269         variable ordered or categorical.
270     @note If the dataset only contains input variables and no responses, use responseStartIdx = -2
271         and responseEndIdx = 0. The output variables vector will just contain zeros.
272      */
273     static Ptr<TrainData> loadFromCSV(const String& filename,
274                                       int headerLineCount,
275                                       int responseStartIdx=-1,
276                                       int responseEndIdx=-1,
277                                       const String& varTypeSpec=String(),
278                                       char delimiter=',',
279                                       char missch='?');
280 
281     /** @brief Creates training data from in-memory arrays.
282 
283     @param samples matrix of samples. It should have CV_32F type.
284     @param layout see ml::SampleTypes.
285     @param responses matrix of responses. If the responses are scalar, they should be stored as a
286         single row or as a single column. The matrix should have type CV_32F or CV_32S (in the
287         former case the responses are considered as ordered by default; in the latter case - as
288         categorical)
289     @param varIdx vector specifying which variables to use for training. It can be an integer vector
290         (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of
291         active variables.
292     @param sampleIdx vector specifying which samples to use for training. It can be an integer
293         vector (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask
294         of training samples.
295     @param sampleWeights optional vector with weights for each sample. It should have CV_32F type.
296     @param varType optional vector of type CV_8U and size `<number_of_variables_in_samples> +
297         <number_of_variables_in_responses>`, containing types of each input and output variable. See
298         ml::VariableTypes.
299      */
300     CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
301                                  InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
302                                  InputArray sampleWeights=noArray(), InputArray varType=noArray());
303 };
304 
305 /** @brief Base class for statistical models in OpenCV ML.
306  */
307 class CV_EXPORTS_W StatModel : public Algorithm
308 {
309 public:
310     /** Predict options */
311     enum Flags {
312         UPDATE_MODEL = 1,
313         RAW_OUTPUT=1, //!< makes the method return the raw results (the sum), not the class label
314         COMPRESSED_INPUT=2,
315         PREPROCESSED_INPUT=4
316     };
317 
318     /** @brief Returns the number of variables in training samples */
319     CV_WRAP virtual int getVarCount() const = 0;
320 
321     CV_WRAP virtual bool empty() const CV_OVERRIDE;
322 
323     /** @brief Returns true if the model is trained */
324     CV_WRAP virtual bool isTrained() const = 0;
325     /** @brief Returns true if the model is classifier */
326     CV_WRAP virtual bool isClassifier() const = 0;
327 
328     /** @brief Trains the statistical model
329 
330     @param trainData training data that can be loaded from file using TrainData::loadFromCSV or
331         created with TrainData::create.
332     @param flags optional flags, depending on the model. Some of the models can be updated with the
333         new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
334      */
335     CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
336 
337     /** @brief Trains the statistical model
338 
339     @param samples training samples
340     @param layout See ml::SampleTypes.
341     @param responses vector of responses associated with the training samples.
342     */
343     CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );
344 
345     /** @brief Computes error on the training or test dataset
346 
347     @param data the training data
348     @param test if true, the error is computed over the test subset of the data, otherwise it's
349         computed over the training subset of the data. Please note that if you loaded a completely
350         different dataset to evaluate already trained classifier, you will probably want not to set
351         the test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so
352         that the error is computed for the whole new set. Yes, this sounds a bit confusing.
353     @param resp the optional output responses.
354 
355     The method uses StatModel::predict to compute the error. For regression models the error is
356     computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
357      */
358     CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
359 
360     /** @brief Predicts response(s) for the provided sample(s)
361 
362     @param samples The input samples, floating-point matrix
363     @param results The optional output matrix of results.
364     @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
365      */
366     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
367 
368     /** @brief Create and train model with default parameters
369 
370     The class must implement static `create()` method with no parameters or with all default parameter values
371     */
train(const Ptr<TrainData> & data,int flags=0)372     template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, int flags=0)
373     {
374         Ptr<_Tp> model = _Tp::create();
375         return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
376     }
377 };
378 
379 /****************************************************************************************\
380 *                                 Normal Bayes Classifier                                *
381 \****************************************************************************************/
382 
383 /** @brief Bayes classifier for normally distributed data.
384 
385 @sa @ref ml_intro_bayes
386  */
387 class CV_EXPORTS_W NormalBayesClassifier : public StatModel
388 {
389 public:
390     /** @brief Predicts the response for sample(s).
391 
392     The method estimates the most probable classes for input vectors. Input vectors (one or more)
393     are stored as rows of the matrix inputs. In case of multiple input vectors, there should be one
394     output vector outputs. The predicted class for a single input vector is returned by the method.
395     The vector outputProbs contains the output probabilities corresponding to each element of
396     result.
397      */
398     CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
399                                OutputArray outputProbs, int flags=0 ) const = 0;
400 
401     /** Creates empty model
402     Use StatModel::train to train the model after creation. */
403     CV_WRAP static Ptr<NormalBayesClassifier> create();
404 
405     /** @brief Loads and creates a serialized NormalBayesClassifier from a file
406      *
407      * Use NormalBayesClassifier::save to serialize and store an NormalBayesClassifier to disk.
408      * Load the NormalBayesClassifier from this file again, by calling this function with the path to the file.
409      * Optionally specify the node for the file containing the classifier
410      *
411      * @param filepath path to serialized NormalBayesClassifier
412      * @param nodeName name of node containing the classifier
413      */
414     CV_WRAP static Ptr<NormalBayesClassifier> load(const String& filepath , const String& nodeName = String());
415 };
416 
417 /****************************************************************************************\
418 *                          K-Nearest Neighbour Classifier                                *
419 \****************************************************************************************/
420 
421 /** @brief The class implements K-Nearest Neighbors model
422 
423 @sa @ref ml_intro_knn
424  */
425 class CV_EXPORTS_W KNearest : public StatModel
426 {
427 public:
428 
429     /** Default number of neighbors to use in predict method. */
430     /** @see setDefaultK */
431     CV_WRAP virtual int getDefaultK() const = 0;
432     /** @copybrief getDefaultK @see getDefaultK */
433     CV_WRAP virtual void setDefaultK(int val) = 0;
434 
435     /** Whether classification or regression model should be trained. */
436     /** @see setIsClassifier */
437     CV_WRAP virtual bool getIsClassifier() const = 0;
438     /** @copybrief getIsClassifier @see getIsClassifier */
439     CV_WRAP virtual void setIsClassifier(bool val) = 0;
440 
441     /** Parameter for KDTree implementation. */
442     /** @see setEmax */
443     CV_WRAP virtual int getEmax() const = 0;
444     /** @copybrief getEmax @see getEmax */
445     CV_WRAP virtual void setEmax(int val) = 0;
446 
447     /** %Algorithm type, one of KNearest::Types. */
448     /** @see setAlgorithmType */
449     CV_WRAP virtual int getAlgorithmType() const = 0;
450     /** @copybrief getAlgorithmType @see getAlgorithmType */
451     CV_WRAP virtual void setAlgorithmType(int val) = 0;
452 
453     /** @brief Finds the neighbors and predicts responses for input vectors.
454 
455     @param samples Input samples stored by rows. It is a single-precision floating-point matrix of
456         `<number_of_samples> * k` size.
457     @param k Number of used nearest neighbors. Should be greater than 1.
458     @param results Vector with results of prediction (regression or classification) for each input
459         sample. It is a single-precision floating-point vector with `<number_of_samples>` elements.
460     @param neighborResponses Optional output values for corresponding neighbors. It is a single-
461         precision floating-point matrix of `<number_of_samples> * k` size.
462     @param dist Optional output distances from the input vectors to the corresponding neighbors. It
463         is a single-precision floating-point matrix of `<number_of_samples> * k` size.
464 
465     For each input vector (a row of the matrix samples), the method finds the k nearest neighbors.
466     In case of regression, the predicted result is a mean value of the particular vector's neighbor
467     responses. In case of classification, the class is determined by voting.
468 
469     For each input vector, the neighbors are sorted by their distances to the vector.
470 
471     In case of C++ interface you can use output pointers to empty matrices and the function will
472     allocate memory itself.
473 
474     If only a single input vector is passed, all output matrices are optional and the predicted
475     value is returned by the method.
476 
477     The function is parallelized with the TBB library.
478      */
479     CV_WRAP virtual float findNearest( InputArray samples, int k,
480                                OutputArray results,
481                                OutputArray neighborResponses=noArray(),
482                                OutputArray dist=noArray() ) const = 0;
483 
484     /** @brief Implementations of KNearest algorithm
485        */
486     enum Types
487     {
488         BRUTE_FORCE=1,
489         KDTREE=2
490     };
491 
492     /** @brief Creates the empty model
493 
494     The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
495      */
496     CV_WRAP static Ptr<KNearest> create();
497 };
498 
499 /****************************************************************************************\
500 *                                   Support Vector Machines                              *
501 \****************************************************************************************/
502 
503 /** @brief Support Vector Machines.
504 
505 @sa @ref ml_intro_svm
506  */
507 class CV_EXPORTS_W SVM : public StatModel
508 {
509 public:
510 
511     class CV_EXPORTS Kernel : public Algorithm
512     {
513     public:
514         virtual int getType() const = 0;
515         virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
516     };
517 
518     /** Type of a %SVM formulation.
519     See SVM::Types. Default value is SVM::C_SVC. */
520     /** @see setType */
521     CV_WRAP virtual int getType() const = 0;
522     /** @copybrief getType @see getType */
523     CV_WRAP virtual void setType(int val) = 0;
524 
525     /** Parameter \f$\gamma\f$ of a kernel function.
526     For SVM::POLY, SVM::RBF, SVM::SIGMOID or SVM::CHI2. Default value is 1. */
527     /** @see setGamma */
528     CV_WRAP virtual double getGamma() const = 0;
529     /** @copybrief getGamma @see getGamma */
530     CV_WRAP virtual void setGamma(double val) = 0;
531 
532     /** Parameter _coef0_ of a kernel function.
533     For SVM::POLY or SVM::SIGMOID. Default value is 0.*/
534     /** @see setCoef0 */
535     CV_WRAP virtual double getCoef0() const = 0;
536     /** @copybrief getCoef0 @see getCoef0 */
537     CV_WRAP virtual void setCoef0(double val) = 0;
538 
539     /** Parameter _degree_ of a kernel function.
540     For SVM::POLY. Default value is 0. */
541     /** @see setDegree */
542     CV_WRAP virtual double getDegree() const = 0;
543     /** @copybrief getDegree @see getDegree */
544     CV_WRAP virtual void setDegree(double val) = 0;
545 
546     /** Parameter _C_ of a %SVM optimization problem.
547     For SVM::C_SVC, SVM::EPS_SVR or SVM::NU_SVR. Default value is 0. */
548     /** @see setC */
549     CV_WRAP virtual double getC() const = 0;
550     /** @copybrief getC @see getC */
551     CV_WRAP virtual void setC(double val) = 0;
552 
553     /** Parameter \f$\nu\f$ of a %SVM optimization problem.
554     For SVM::NU_SVC, SVM::ONE_CLASS or SVM::NU_SVR. Default value is 0. */
555     /** @see setNu */
556     CV_WRAP virtual double getNu() const = 0;
557     /** @copybrief getNu @see getNu */
558     CV_WRAP virtual void setNu(double val) = 0;
559 
560     /** Parameter \f$\epsilon\f$ of a %SVM optimization problem.
561     For SVM::EPS_SVR. Default value is 0. */
562     /** @see setP */
563     CV_WRAP virtual double getP() const = 0;
564     /** @copybrief getP @see getP */
565     CV_WRAP virtual void setP(double val) = 0;
566 
567     /** Optional weights in the SVM::C_SVC problem, assigned to particular classes.
568     They are multiplied by _C_ so the parameter _C_ of class _i_ becomes `classWeights(i) * C`. Thus
569     these weights affect the misclassification penalty for different classes. The larger weight,
570     the larger penalty on misclassification of data from the corresponding class. Default value is
571     empty Mat. */
572     /** @see setClassWeights */
573     CV_WRAP virtual cv::Mat getClassWeights() const = 0;
574     /** @copybrief getClassWeights @see getClassWeights */
575     CV_WRAP virtual void setClassWeights(const cv::Mat &val) = 0;
576 
577     /** Termination criteria of the iterative %SVM training procedure which solves a partial
578     case of constrained quadratic optimization problem.
579     You can specify tolerance and/or the maximum number of iterations. Default value is
580     `TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, FLT_EPSILON )`; */
581     /** @see setTermCriteria */
582     CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
583     /** @copybrief getTermCriteria @see getTermCriteria */
584     CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
585 
586     /** Type of a %SVM kernel.
587     See SVM::KernelTypes. Default value is SVM::RBF. */
588     CV_WRAP virtual int getKernelType() const = 0;
589 
590     /** Initialize with one of predefined kernels.
591     See SVM::KernelTypes. */
592     CV_WRAP virtual void setKernel(int kernelType) = 0;
593 
594     /** Initialize with custom kernel.
595     See SVM::Kernel class for implementation details */
596     virtual void setCustomKernel(const Ptr<Kernel> &_kernel) = 0;
597 
598     //! %SVM type
599     enum Types {
600         /** C-Support Vector Classification. n-class classification (n \f$\geq\f$ 2), allows
601         imperfect separation of classes with penalty multiplier C for outliers. */
602         C_SVC=100,
603         /** \f$\nu\f$-Support Vector Classification. n-class classification with possible
604         imperfect separation. Parameter \f$\nu\f$ (in the range 0..1, the larger the value, the smoother
605         the decision boundary) is used instead of C. */
606         NU_SVC=101,
607         /** Distribution Estimation (One-class %SVM). All the training data are from
608         the same class, %SVM builds a boundary that separates the class from the rest of the feature
609         space. */
610         ONE_CLASS=102,
611         /** \f$\epsilon\f$-Support Vector Regression. The distance between feature vectors
612         from the training set and the fitting hyper-plane must be less than p. For outliers the
613         penalty multiplier C is used. */
614         EPS_SVR=103,
615         /** \f$\nu\f$-Support Vector Regression. \f$\nu\f$ is used instead of p.
616         See @cite LibSVM for details. */
617         NU_SVR=104
618     };
619 
620     /** @brief %SVM kernel type
621 
622     A comparison of different kernels on the following 2D test case with four classes. Four
623     SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three
624     different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score.
625     Bright means max-score \> 0, dark means max-score \< 0.
626     ![image](pics/SVM_Comparison.png)
627     */
628     enum KernelTypes {
629         /** Returned by SVM::getKernelType in case when custom kernel has been set */
630         CUSTOM=-1,
631         /** Linear kernel. No mapping is done, linear discrimination (or regression) is
632         done in the original feature space. It is the fastest option. \f$K(x_i, x_j) = x_i^T x_j\f$. */
633         LINEAR=0,
634         /** Polynomial kernel:
635         \f$K(x_i, x_j) = (\gamma x_i^T x_j + coef0)^{degree}, \gamma > 0\f$. */
636         POLY=1,
637         /** Radial basis function (RBF), a good choice in most cases.
638         \f$K(x_i, x_j) = e^{-\gamma ||x_i - x_j||^2}, \gamma > 0\f$. */
639         RBF=2,
640         /** Sigmoid kernel: \f$K(x_i, x_j) = \tanh(\gamma x_i^T x_j + coef0)\f$. */
641         SIGMOID=3,
642         /** Exponential Chi2 kernel, similar to the RBF kernel:
643         \f$K(x_i, x_j) = e^{-\gamma \chi^2(x_i,x_j)}, \chi^2(x_i,x_j) = (x_i-x_j)^2/(x_i+x_j), \gamma > 0\f$. */
644         CHI2=4,
645         /** Histogram intersection kernel. A fast kernel. \f$K(x_i, x_j) = min(x_i,x_j)\f$. */
646         INTER=5
647     };
648 
649     //! %SVM params type
650     enum ParamTypes {
651         C=0,
652         GAMMA=1,
653         P=2,
654         NU=3,
655         COEF=4,
656         DEGREE=5
657     };
658 
659     /** @brief Trains an %SVM with optimal parameters.
660 
661     @param data the training data that can be constructed using TrainData::create or
662         TrainData::loadFromCSV.
663     @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
664         subset is used to test the model, the others form the train set. So, the %SVM algorithm is
665         executed kFold times.
666     @param Cgrid grid for C
667     @param gammaGrid grid for gamma
668     @param pGrid grid for p
669     @param nuGrid grid for nu
670     @param coeffGrid grid for coeff
671     @param degreeGrid grid for degree
672     @param balanced If true and the problem is 2-class classification then the method creates more
673         balanced cross-validation subsets that is proportions between classes in subsets are close
674         to such proportion in the whole train dataset.
675 
676     The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
677     nu, coef0, degree. Parameters are considered optimal when the cross-validation
678     estimate of the test set error is minimal.
679 
680     If there is no need to optimize a parameter, the corresponding grid step should be set to any
681     value less than or equal to 1. For example, to avoid optimization in gamma, set `gammaGrid.step
682     = 0`, `gammaGrid.minVal`, `gamma_grid.maxVal` as arbitrary numbers. In this case, the value
683     `Gamma` is taken for gamma.
684 
685     And, finally, if the optimization in a parameter is required but the corresponding grid is
686     unknown, you may call the function SVM::getDefaultGrid. To generate a grid, for example, for
687     gamma, call `SVM::getDefaultGrid(SVM::GAMMA)`.
688 
689     This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
690     regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
691     the usual %SVM with parameters specified in params is executed.
692      */
693     virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
694                     ParamGrid Cgrid = getDefaultGrid(C),
695                     ParamGrid gammaGrid  = getDefaultGrid(GAMMA),
696                     ParamGrid pGrid      = getDefaultGrid(P),
697                     ParamGrid nuGrid     = getDefaultGrid(NU),
698                     ParamGrid coeffGrid  = getDefaultGrid(COEF),
699                     ParamGrid degreeGrid = getDefaultGrid(DEGREE),
700                     bool balanced=false) = 0;
701 
702     /** @brief Trains an %SVM with optimal parameters
703 
704     @param samples training samples
705     @param layout See ml::SampleTypes.
706     @param responses vector of responses associated with the training samples.
707     @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
708         subset is used to test the model, the others form the train set. So, the %SVM algorithm is
709     @param Cgrid grid for C
710     @param gammaGrid grid for gamma
711     @param pGrid grid for p
712     @param nuGrid grid for nu
713     @param coeffGrid grid for coeff
714     @param degreeGrid grid for degree
715     @param balanced If true and the problem is 2-class classification then the method creates more
716         balanced cross-validation subsets that is proportions between classes in subsets are close
717         to such proportion in the whole train dataset.
718 
719     The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
720     nu, coef0, degree. Parameters are considered optimal when the cross-validation
721     estimate of the test set error is minimal.
722 
723     This function only makes use of SVM::getDefaultGrid for parameter optimization and thus only
724     offers rudimentary parameter options.
725 
726     This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
727     regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
728     the usual %SVM with parameters specified in params is executed.
729     */
730     CV_WRAP bool trainAuto(InputArray samples,
731             int layout,
732             InputArray responses,
733             int kFold = 10,
734             Ptr<ParamGrid> Cgrid = SVM::getDefaultGridPtr(SVM::C),
735             Ptr<ParamGrid> gammaGrid  = SVM::getDefaultGridPtr(SVM::GAMMA),
736             Ptr<ParamGrid> pGrid      = SVM::getDefaultGridPtr(SVM::P),
737             Ptr<ParamGrid> nuGrid     = SVM::getDefaultGridPtr(SVM::NU),
738             Ptr<ParamGrid> coeffGrid  = SVM::getDefaultGridPtr(SVM::COEF),
739             Ptr<ParamGrid> degreeGrid = SVM::getDefaultGridPtr(SVM::DEGREE),
740             bool balanced=false);
741 
742     /** @brief Retrieves all the support vectors
743 
744     The method returns all the support vectors as a floating-point matrix, where support vectors are
745     stored as matrix rows.
746      */
747     CV_WRAP virtual Mat getSupportVectors() const = 0;
748 
749     /** @brief Retrieves all the uncompressed support vectors of a linear %SVM
750 
751     The method returns all the uncompressed support vectors of a linear %SVM that the compressed
752     support vector, used for prediction, was derived from. They are returned in a floating-point
753     matrix, where the support vectors are stored as matrix rows.
754      */
755     CV_WRAP Mat getUncompressedSupportVectors() const;
756 
757     /** @brief Retrieves the decision function
758 
759     @param i the index of the decision function. If the problem solved is regression, 1-class or
760         2-class classification, then there will be just one decision function and the index should
761         always be 0. Otherwise, in the case of N-class classification, there will be \f$N(N-1)/2\f$
762         decision functions.
763     @param alpha the optional output vector for weights, corresponding to different support vectors.
764         In the case of linear %SVM all the alpha's will be 1's.
765     @param svidx the optional output vector of indices of support vectors within the matrix of
766         support vectors (which can be retrieved by SVM::getSupportVectors). In the case of linear
767         %SVM each decision function consists of a single "compressed" support vector.
768 
769     The method returns rho parameter of the decision function, a scalar subtracted from the weighted
770     sum of kernel responses.
771      */
772     CV_WRAP virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
773 
774     /** @brief Generates a grid for %SVM parameters.
775 
776     @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
777     generated for the parameter with this ID.
778 
779     The function generates a grid for the specified parameter of the %SVM algorithm. The grid may be
780     passed to the function SVM::trainAuto.
781      */
782     static ParamGrid getDefaultGrid( int param_id );
783 
784     /** @brief Generates a grid for %SVM parameters.
785 
786     @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
787     generated for the parameter with this ID.
788 
789     The function generates a grid pointer for the specified parameter of the %SVM algorithm.
790     The grid may be passed to the function SVM::trainAuto.
791      */
792     CV_WRAP static Ptr<ParamGrid> getDefaultGridPtr( int param_id );
793 
794     /** Creates empty model.
795     Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
796     find the best parameters for your problem, it can be done with SVM::trainAuto. */
797     CV_WRAP static Ptr<SVM> create();
798 
799     /** @brief Loads and creates a serialized svm from a file
800      *
801      * Use SVM::save to serialize and store an SVM to disk.
802      * Load the SVM from this file again, by calling this function with the path to the file.
803      *
804      * @param filepath path to serialized svm
805      */
806     CV_WRAP static Ptr<SVM> load(const String& filepath);
807 };
808 
809 /****************************************************************************************\
810 *                              Expectation - Maximization                                *
811 \****************************************************************************************/
812 
813 /** @brief The class implements the Expectation Maximization algorithm.
814 
815 @sa @ref ml_intro_em
816  */
817 class CV_EXPORTS_W EM : public StatModel
818 {
819 public:
820     //! Type of covariation matrices
821     enum Types {
822         /** A scaled identity matrix \f$\mu_k * I\f$. There is the only
823         parameter \f$\mu_k\f$ to be estimated for each matrix. The option may be used in special cases,
824         when the constraint is relevant, or as a first step in the optimization (for example in case
825         when the data is preprocessed with PCA). The results of such preliminary estimation may be
826         passed again to the optimization procedure, this time with
827         covMatType=EM::COV_MAT_DIAGONAL. */
828         COV_MAT_SPHERICAL=0,
829         /** A diagonal matrix with positive diagonal elements. The number of
830         free parameters is d for each matrix. This is most commonly used option yielding good
831         estimation results. */
832         COV_MAT_DIAGONAL=1,
833         /** A symmetric positively defined matrix. The number of free
834         parameters in each matrix is about \f$d^2/2\f$. It is not recommended to use this option, unless
835         there is pretty accurate initial estimation of the parameters and/or a huge number of
836         training samples. */
837         COV_MAT_GENERIC=2,
838         COV_MAT_DEFAULT=COV_MAT_DIAGONAL
839     };
840 
841     //! Default parameters
842     enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
843 
844     //! The initial step
845     enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
846 
847     /** The number of mixture components in the Gaussian mixture model.
848     Default value of the parameter is EM::DEFAULT_NCLUSTERS=5. Some of %EM implementation could
849     determine the optimal number of mixtures within a specified value range, but that is not the
850     case in ML yet. */
851     /** @see setClustersNumber */
852     CV_WRAP virtual int getClustersNumber() const = 0;
853     /** @copybrief getClustersNumber @see getClustersNumber */
854     CV_WRAP virtual void setClustersNumber(int val) = 0;
855 
856     /** Constraint on covariance matrices which defines type of matrices.
857     See EM::Types. */
858     /** @see setCovarianceMatrixType */
859     CV_WRAP virtual int getCovarianceMatrixType() const = 0;
860     /** @copybrief getCovarianceMatrixType @see getCovarianceMatrixType */
861     CV_WRAP virtual void setCovarianceMatrixType(int val) = 0;
862 
863     /** The termination criteria of the %EM algorithm.
864     The %EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of
865     M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default
866     maximum number of iterations is EM::DEFAULT_MAX_ITERS=100. */
867     /** @see setTermCriteria */
868     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
869     /** @copybrief getTermCriteria @see getTermCriteria */
870     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
871 
872     /** @brief Returns weights of the mixtures
873 
874     Returns vector with the number of elements equal to the number of mixtures.
875      */
876     CV_WRAP virtual Mat getWeights() const = 0;
877     /** @brief Returns the cluster centers (means of the Gaussian mixture)
878 
879     Returns matrix with the number of rows equal to the number of mixtures and number of columns
880     equal to the space dimensionality.
881      */
882     CV_WRAP virtual Mat getMeans() const = 0;
883     /** @brief Returns covariation matrices
884 
885     Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures,
886     each matrix is a square floating-point matrix NxN, where N is the space dimensionality.
887      */
888     CV_WRAP virtual void getCovs(CV_OUT std::vector<Mat>& covs) const = 0;
889 
890     /** @brief Returns posterior probabilities for the provided samples
891 
892     @param samples The input samples, floating-point matrix
893     @param results The optional output \f$ nSamples \times nClusters\f$ matrix of results. It contains
894     posterior probabilities for each sample from the input
895     @param flags This parameter will be ignored
896      */
897     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
898 
899     /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component
900     for the given sample.
901 
902     @param sample A sample for classification. It should be a one-channel matrix of
903         \f$1 \times dims\f$ or \f$dims \times 1\f$ size.
904     @param probs Optional output matrix that contains posterior probabilities of each component
905         given the sample. It has \f$1 \times nclusters\f$ size and CV_64FC1 type.
906 
907     The method returns a two-element double vector. Zero element is a likelihood logarithm value for
908     the sample. First element is an index of the most probable mixture component for the given
909     sample.
910      */
911     CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
912 
913     /** @brief Estimate the Gaussian mixture parameters from a samples set.
914 
915     This variation starts with Expectation step. Initial values of the model parameters will be
916     estimated by the k-means algorithm.
917 
918     Unlike many of the ML models, %EM is an unsupervised learning algorithm and it does not take
919     responses (class labels or function values) as input. Instead, it computes the *Maximum
920     Likelihood Estimate* of the Gaussian mixture parameters from an input sample set, stores all the
921     parameters inside the structure: \f$p_{i,k}\f$ in probs, \f$a_k\f$ in means , \f$S_k\f$ in
922     covs[k], \f$\pi_k\f$ in weights , and optionally computes the output "class label" for each
923     sample: \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most
924     probable mixture component for each sample).
925 
926     The trained model can be used further for prediction, just like any other classifier. The
927     trained model is similar to the NormalBayesClassifier.
928 
929     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
930         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
931         it will be converted to the inner matrix of such type for the further computing.
932     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
933         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
934     @param labels The optional output "class label" for each sample:
935         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
936         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
937     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
938         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
939         CV_64FC1 type.
940      */
941     CV_WRAP virtual bool trainEM(InputArray samples,
942                          OutputArray logLikelihoods=noArray(),
943                          OutputArray labels=noArray(),
944                          OutputArray probs=noArray()) = 0;
945 
946     /** @brief Estimate the Gaussian mixture parameters from a samples set.
947 
948     This variation starts with Expectation step. You need to provide initial means \f$a_k\f$ of
949     mixture components. Optionally you can pass initial weights \f$\pi_k\f$ and covariance matrices
950     \f$S_k\f$ of mixture components.
951 
952     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
953         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
954         it will be converted to the inner matrix of such type for the further computing.
955     @param means0 Initial means \f$a_k\f$ of mixture components. It is a one-channel matrix of
956         \f$nclusters \times dims\f$ size. If the matrix does not have CV_64F type it will be
957         converted to the inner matrix of such type for the further computing.
958     @param covs0 The vector of initial covariance matrices \f$S_k\f$ of mixture components. Each of
959         covariance matrices is a one-channel matrix of \f$dims \times dims\f$ size. If the matrices
960         do not have CV_64F type they will be converted to the inner matrices of such type for the
961         further computing.
962     @param weights0 Initial weights \f$\pi_k\f$ of mixture components. It should be a one-channel
963         floating-point matrix with \f$1 \times nclusters\f$ or \f$nclusters \times 1\f$ size.
964     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
965         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
966     @param labels The optional output "class label" for each sample:
967         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
968         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
969     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
970         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
971         CV_64FC1 type.
972     */
973     CV_WRAP virtual bool trainE(InputArray samples, InputArray means0,
974                         InputArray covs0=noArray(),
975                         InputArray weights0=noArray(),
976                         OutputArray logLikelihoods=noArray(),
977                         OutputArray labels=noArray(),
978                         OutputArray probs=noArray()) = 0;
979 
980     /** @brief Estimate the Gaussian mixture parameters from a samples set.
981 
982     This variation starts with Maximization step. You need to provide initial probabilities
983     \f$p_{i,k}\f$ to use this option.
984 
985     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
986         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
987         it will be converted to the inner matrix of such type for the further computing.
988     @param probs0
989     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
990         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
991     @param labels The optional output "class label" for each sample:
992         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
993         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
994     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
995         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
996         CV_64FC1 type.
997     */
998     CV_WRAP virtual bool trainM(InputArray samples, InputArray probs0,
999                         OutputArray logLikelihoods=noArray(),
1000                         OutputArray labels=noArray(),
1001                         OutputArray probs=noArray()) = 0;
1002 
1003     /** Creates empty %EM model.
1004     The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you
1005     can use one of the EM::train\* methods or load it from file using Algorithm::load\<EM\>(filename).
1006      */
1007     CV_WRAP static Ptr<EM> create();
1008 
1009     /** @brief Loads and creates a serialized EM from a file
1010      *
1011      * Use EM::save to serialize and store an EM to disk.
1012      * Load the EM from this file again, by calling this function with the path to the file.
1013      * Optionally specify the node for the file containing the classifier
1014      *
1015      * @param filepath path to serialized EM
1016      * @param nodeName name of node containing the classifier
1017      */
1018     CV_WRAP static Ptr<EM> load(const String& filepath , const String& nodeName = String());
1019 };
1020 
1021 /****************************************************************************************\
1022 *                                      Decision Tree                                     *
1023 \****************************************************************************************/
1024 
1025 /** @brief The class represents a single decision tree or a collection of decision trees.
1026 
1027 The current public interface of the class allows user to train only a single decision tree, however
1028 the class is capable of storing multiple decision trees and using them for prediction (by summing
1029 responses or using a voting schemes), and the derived from DTrees classes (such as RTrees and Boost)
1030 use this capability to implement decision tree ensembles.
1031 
1032 @sa @ref ml_intro_trees
1033 */
1034 class CV_EXPORTS_W DTrees : public StatModel
1035 {
1036 public:
1037     /** Predict options */
1038     enum Flags { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) };
1039 
1040     /** Cluster possible values of a categorical variable into K\<=maxCategories clusters to
1041     find a suboptimal split.
1042     If a discrete variable, on which the training procedure tries to make a split, takes more than
1043     maxCategories values, the precise best subset estimation may take a very long time because the
1044     algorithm is exponential. Instead, many decision trees engines (including our implementation)
1045     try to find sub-optimal split in this case by clustering all the samples into maxCategories
1046     clusters that is some categories are merged together. The clustering is applied only in n \>
1047     2-class classification problems for categorical variables with N \> max_categories possible
1048     values. In case of regression and 2-class classification the optimal split can be found
1049     efficiently without employing clustering, thus the parameter is not used in these cases.
1050     Default value is 10.*/
1051     /** @see setMaxCategories */
1052     CV_WRAP virtual int getMaxCategories() const = 0;
1053     /** @copybrief getMaxCategories @see getMaxCategories */
1054     CV_WRAP virtual void setMaxCategories(int val) = 0;
1055 
1056     /** The maximum possible depth of the tree.
1057     That is the training algorithms attempts to split a node while its depth is less than maxDepth.
1058     The root node has zero depth. The actual depth may be smaller if the other termination criteria
1059     are met (see the outline of the training procedure @ref ml_intro_trees "here"), and/or if the
1060     tree is pruned. Default value is INT_MAX.*/
1061     /** @see setMaxDepth */
1062     CV_WRAP virtual int getMaxDepth() const = 0;
1063     /** @copybrief getMaxDepth @see getMaxDepth */
1064     CV_WRAP virtual void setMaxDepth(int val) = 0;
1065 
1066     /** If the number of samples in a node is less than this parameter then the node will not be split.
1067 
1068     Default value is 10.*/
1069     /** @see setMinSampleCount */
1070     CV_WRAP virtual int getMinSampleCount() const = 0;
1071     /** @copybrief getMinSampleCount @see getMinSampleCount */
1072     CV_WRAP virtual void setMinSampleCount(int val) = 0;
1073 
1074     /** If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold
1075     cross-validation procedure where K is equal to CVFolds.
1076     Default value is 10.*/
1077     /** @see setCVFolds */
1078     CV_WRAP virtual int getCVFolds() const = 0;
1079     /** @copybrief getCVFolds @see getCVFolds */
1080     CV_WRAP virtual void setCVFolds(int val) = 0;
1081 
1082     /** If true then surrogate splits will be built.
1083     These splits allow to work with missing data and compute variable importance correctly.
1084     Default value is false.
1085     @note currently it's not implemented.*/
1086     /** @see setUseSurrogates */
1087     CV_WRAP virtual bool getUseSurrogates() const = 0;
1088     /** @copybrief getUseSurrogates @see getUseSurrogates */
1089     CV_WRAP virtual void setUseSurrogates(bool val) = 0;
1090 
1091     /** If true then a pruning will be harsher.
1092     This will make a tree more compact and more resistant to the training data noise but a bit less
1093     accurate. Default value is true.*/
1094     /** @see setUse1SERule */
1095     CV_WRAP virtual bool getUse1SERule() const = 0;
1096     /** @copybrief getUse1SERule @see getUse1SERule */
1097     CV_WRAP virtual void setUse1SERule(bool val) = 0;
1098 
1099     /** If true then pruned branches are physically removed from the tree.
1100     Otherwise they are retained and it is possible to get results from the original unpruned (or
1101     pruned less aggressively) tree. Default value is true.*/
1102     /** @see setTruncatePrunedTree */
1103     CV_WRAP virtual bool getTruncatePrunedTree() const = 0;
1104     /** @copybrief getTruncatePrunedTree @see getTruncatePrunedTree */
1105     CV_WRAP virtual void setTruncatePrunedTree(bool val) = 0;
1106 
1107     /** Termination criteria for regression trees.
1108     If all absolute differences between an estimated value in a node and values of train samples
1109     in this node are less than this parameter then the node will not be split further. Default
1110     value is 0.01f*/
1111     /** @see setRegressionAccuracy */
1112     CV_WRAP virtual float getRegressionAccuracy() const = 0;
1113     /** @copybrief getRegressionAccuracy @see getRegressionAccuracy */
1114     CV_WRAP virtual void setRegressionAccuracy(float val) = 0;
1115 
1116     /** @brief The array of a priori class probabilities, sorted by the class label value.
1117 
1118     The parameter can be used to tune the decision tree preferences toward a certain class. For
1119     example, if you want to detect some rare anomaly occurrence, the training base will likely
1120     contain much more normal cases than anomalies, so a very good classification performance
1121     will be achieved just by considering every case as normal. To avoid this, the priors can be
1122     specified, where the anomaly probability is artificially increased (up to 0.5 or even
1123     greater), so the weight of the misclassified anomalies becomes much bigger, and the tree is
1124     adjusted properly.
1125 
1126     You can also think about this parameter as weights of prediction categories which determine
1127     relative weights that you give to misclassification. That is, if the weight of the first
1128     category is 1 and the weight of the second category is 10, then each mistake in predicting
1129     the second category is equivalent to making 10 mistakes in predicting the first category.
1130     Default value is empty Mat.*/
1131     /** @see setPriors */
1132     CV_WRAP virtual cv::Mat getPriors() const = 0;
1133     /** @copybrief getPriors @see getPriors */
1134     CV_WRAP virtual void setPriors(const cv::Mat &val) = 0;
1135 
1136     /** @brief The class represents a decision tree node.
1137      */
1138     class CV_EXPORTS Node
1139     {
1140     public:
1141         Node();
1142         double value; //!< Value at the node: a class label in case of classification or estimated
1143                       //!< function value in case of regression.
1144         int classIdx; //!< Class index normalized to 0..class_count-1 range and assigned to the
1145                       //!< node. It is used internally in classification trees and tree ensembles.
1146         int parent; //!< Index of the parent node
1147         int left; //!< Index of the left child node
1148         int right; //!< Index of right child node
1149         int defaultDir; //!< Default direction where to go (-1: left or +1: right). It helps in the
1150                         //!< case of missing values.
1151         int split; //!< Index of the first split
1152     };
1153 
1154     /** @brief The class represents split in a decision tree.
1155      */
1156     class CV_EXPORTS Split
1157     {
1158     public:
1159         Split();
1160         int varIdx; //!< Index of variable on which the split is created.
1161         bool inversed; //!< If true, then the inverse split rule is used (i.e. left and right
1162                        //!< branches are exchanged in the rule expressions below).
1163         float quality; //!< The split quality, a positive number. It is used to choose the best split.
1164         int next; //!< Index of the next split in the list of splits for the node
1165         float c; /**< The threshold value in case of split on an ordered variable.
1166                       The rule is:
1167                       @code{.none}
1168                       if var_value < c
1169                         then next_node <- left
1170                         else next_node <- right
1171                       @endcode */
1172         int subsetOfs; /**< Offset of the bitset used by the split on a categorical variable.
1173                             The rule is:
1174                             @code{.none}
1175                             if bitset[var_value] == 1
1176                                 then next_node <- left
1177                                 else next_node <- right
1178                             @endcode */
1179     };
1180 
1181     /** @brief Returns indices of root nodes
1182     */
1183     virtual const std::vector<int>& getRoots() const = 0;
1184     /** @brief Returns all the nodes
1185 
1186     all the node indices are indices in the returned vector
1187      */
1188     virtual const std::vector<Node>& getNodes() const = 0;
1189     /** @brief Returns all the splits
1190 
1191     all the split indices are indices in the returned vector
1192      */
1193     virtual const std::vector<Split>& getSplits() const = 0;
1194     /** @brief Returns all the bitsets for categorical splits
1195 
1196     Split::subsetOfs is an offset in the returned vector
1197      */
1198     virtual const std::vector<int>& getSubsets() const = 0;
1199 
1200     /** @brief Creates the empty model
1201 
1202     The static method creates empty decision tree with the specified parameters. It should be then
1203     trained using train method (see StatModel::train). Alternatively, you can load the model from
1204     file using Algorithm::load\<DTrees\>(filename).
1205      */
1206     CV_WRAP static Ptr<DTrees> create();
1207 
1208     /** @brief Loads and creates a serialized DTrees from a file
1209      *
1210      * Use DTree::save to serialize and store an DTree to disk.
1211      * Load the DTree from this file again, by calling this function with the path to the file.
1212      * Optionally specify the node for the file containing the classifier
1213      *
1214      * @param filepath path to serialized DTree
1215      * @param nodeName name of node containing the classifier
1216      */
1217     CV_WRAP static Ptr<DTrees> load(const String& filepath , const String& nodeName = String());
1218 };
1219 
1220 /****************************************************************************************\
1221 *                                   Random Trees Classifier                              *
1222 \****************************************************************************************/
1223 
1224 /** @brief The class implements the random forest predictor.
1225 
1226 @sa @ref ml_intro_rtrees
1227  */
1228 class CV_EXPORTS_W RTrees : public DTrees
1229 {
1230 public:
1231 
1232     /** If true then variable importance will be calculated and then it can be retrieved by RTrees::getVarImportance.
1233     Default value is false.*/
1234     /** @see setCalculateVarImportance */
1235     CV_WRAP virtual bool getCalculateVarImportance() const = 0;
1236     /** @copybrief getCalculateVarImportance @see getCalculateVarImportance */
1237     CV_WRAP virtual void setCalculateVarImportance(bool val) = 0;
1238 
1239     /** The size of the randomly selected subset of features at each tree node and that are used
1240     to find the best split(s).
1241     If you set it to 0 then the size will be set to the square root of the total number of
1242     features. Default value is 0.*/
1243     /** @see setActiveVarCount */
1244     CV_WRAP virtual int getActiveVarCount() const = 0;
1245     /** @copybrief getActiveVarCount @see getActiveVarCount */
1246     CV_WRAP virtual void setActiveVarCount(int val) = 0;
1247 
1248     /** The termination criteria that specifies when the training algorithm stops.
1249     Either when the specified number of trees is trained and added to the ensemble or when
1250     sufficient accuracy (measured as OOB error) is achieved. Typically the more trees you have the
1251     better the accuracy. However, the improvement in accuracy generally diminishes and asymptotes
1252     pass a certain number of trees. Also to keep in mind, the number of tree increases the
1253     prediction time linearly. Default value is TermCriteria(TermCriteria::MAX_ITERS +
1254     TermCriteria::EPS, 50, 0.1)*/
1255     /** @see setTermCriteria */
1256     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1257     /** @copybrief getTermCriteria @see getTermCriteria */
1258     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
1259 
1260     /** Returns the variable importance array.
1261     The method returns the variable importance vector, computed at the training stage when
1262     CalculateVarImportance is set to true. If this flag was set to false, the empty matrix is
1263     returned.
1264      */
1265     CV_WRAP virtual Mat getVarImportance() const = 0;
1266 
1267     /** Returns the result of each individual tree in the forest.
1268     In case the model is a regression problem, the method will return each of the trees'
1269     results for each of the sample cases. If the model is a classifier, it will return
1270     a Mat with samples + 1 rows, where the first row gives the class number and the
1271     following rows return the votes each class had for each sample.
1272         @param samples Array containing the samples for which votes will be calculated.
1273         @param results Array where the result of the calculation will be written.
1274         @param flags Flags for defining the type of RTrees.
1275     */
1276     CV_WRAP void getVotes(InputArray samples, OutputArray results, int flags) const;
1277 
1278     /** Creates the empty model.
1279     Use StatModel::train to train the model, StatModel::train to create and train the model,
1280     Algorithm::load to load the pre-trained model.
1281      */
1282     CV_WRAP static Ptr<RTrees> create();
1283 
1284     /** @brief Loads and creates a serialized RTree from a file
1285      *
1286      * Use RTree::save to serialize and store an RTree to disk.
1287      * Load the RTree from this file again, by calling this function with the path to the file.
1288      * Optionally specify the node for the file containing the classifier
1289      *
1290      * @param filepath path to serialized RTree
1291      * @param nodeName name of node containing the classifier
1292      */
1293     CV_WRAP static Ptr<RTrees> load(const String& filepath , const String& nodeName = String());
1294 };
1295 
1296 /****************************************************************************************\
1297 *                                   Boosted tree classifier                              *
1298 \****************************************************************************************/
1299 
1300 /** @brief Boosted tree classifier derived from DTrees
1301 
1302 @sa @ref ml_intro_boost
1303  */
1304 class CV_EXPORTS_W Boost : public DTrees
1305 {
1306 public:
1307     /** Type of the boosting algorithm.
1308     See Boost::Types. Default value is Boost::REAL. */
1309     /** @see setBoostType */
1310     CV_WRAP virtual int getBoostType() const = 0;
1311     /** @copybrief getBoostType @see getBoostType */
1312     CV_WRAP virtual void setBoostType(int val) = 0;
1313 
1314     /** The number of weak classifiers.
1315     Default value is 100. */
1316     /** @see setWeakCount */
1317     CV_WRAP virtual int getWeakCount() const = 0;
1318     /** @copybrief getWeakCount @see getWeakCount */
1319     CV_WRAP virtual void setWeakCount(int val) = 0;
1320 
1321     /** A threshold between 0 and 1 used to save computational time.
1322     Samples with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next*
1323     iteration of training. Set this parameter to 0 to turn off this functionality. Default value is 0.95.*/
1324     /** @see setWeightTrimRate */
1325     CV_WRAP virtual double getWeightTrimRate() const = 0;
1326     /** @copybrief getWeightTrimRate @see getWeightTrimRate */
1327     CV_WRAP virtual void setWeightTrimRate(double val) = 0;
1328 
1329     /** Boosting type.
1330     Gentle AdaBoost and Real AdaBoost are often the preferable choices. */
1331     enum Types {
1332         DISCRETE=0, //!< Discrete AdaBoost.
1333         REAL=1, //!< Real AdaBoost. It is a technique that utilizes confidence-rated predictions
1334                 //!< and works well with categorical data.
1335         LOGIT=2, //!< LogitBoost. It can produce good regression fits.
1336         GENTLE=3 //!< Gentle AdaBoost. It puts less weight on outlier data points and for that
1337                  //!<reason is often good with regression data.
1338     };
1339 
1340     /** Creates the empty model.
1341     Use StatModel::train to train the model, Algorithm::load\<Boost\>(filename) to load the pre-trained model. */
1342     CV_WRAP static Ptr<Boost> create();
1343 
1344     /** @brief Loads and creates a serialized Boost from a file
1345      *
1346      * Use Boost::save to serialize and store an RTree to disk.
1347      * Load the Boost from this file again, by calling this function with the path to the file.
1348      * Optionally specify the node for the file containing the classifier
1349      *
1350      * @param filepath path to serialized Boost
1351      * @param nodeName name of node containing the classifier
1352      */
1353     CV_WRAP static Ptr<Boost> load(const String& filepath , const String& nodeName = String());
1354 };
1355 
1356 /****************************************************************************************\
1357 *                                   Gradient Boosted Trees                               *
1358 \****************************************************************************************/
1359 
1360 /*class CV_EXPORTS_W GBTrees : public DTrees
1361 {
1362 public:
1363     struct CV_EXPORTS_W_MAP Params : public DTrees::Params
1364     {
1365         CV_PROP_RW int weakCount;
1366         CV_PROP_RW int lossFunctionType;
1367         CV_PROP_RW float subsamplePortion;
1368         CV_PROP_RW float shrinkage;
1369 
1370         Params();
1371         Params( int lossFunctionType, int weakCount, float shrinkage,
1372                 float subsamplePortion, int maxDepth, bool useSurrogates );
1373     };
1374 
1375     enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
1376 
1377     virtual void setK(int k) = 0;
1378 
1379     virtual float predictSerial( InputArray samples,
1380                                  OutputArray weakResponses, int flags) const = 0;
1381 
1382     static Ptr<GBTrees> create(const Params& p);
1383 };*/
1384 
1385 /****************************************************************************************\
1386 *                              Artificial Neural Networks (ANN)                          *
1387 \****************************************************************************************/
1388 
1389 /////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
1390 
1391 /** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
1392 
1393 Unlike many other models in ML that are constructed and trained at once, in the MLP model these
1394 steps are separated. First, a network with the specified topology is created using the non-default
1395 constructor or the method ANN_MLP::create. All the weights are set to zeros. Then, the network is
1396 trained using a set of input and output vectors. The training procedure can be repeated more than
1397 once, that is, the weights can be adjusted based on the new training data.
1398 
1399 Additional flags for StatModel::train are available: ANN_MLP::TrainFlags.
1400 
1401 @sa @ref ml_intro_ann
1402  */
1403 class CV_EXPORTS_W ANN_MLP : public StatModel
1404 {
1405 public:
1406     /** Available training methods */
1407     enum TrainingMethods {
1408         BACKPROP=0, //!< The back-propagation algorithm.
1409         RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details.
1410         ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details.
1411     };
1412 
1413     /** Sets training method and common parameters.
1414     @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
1415     @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL.
1416     @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL.
1417     */
1418     CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
1419 
1420     /** Returns current training method */
1421     CV_WRAP virtual int getTrainMethod() const = 0;
1422 
1423     /** Initialize the activation function for each neuron.
1424     Currently the default and the only fully supported activation function is ANN_MLP::SIGMOID_SYM.
1425     @param type The type of activation function. See ANN_MLP::ActivationFunctions.
1426     @param param1 The first parameter of the activation function, \f$\alpha\f$. Default value is 0.
1427     @param param2 The second parameter of the activation function, \f$\beta\f$. Default value is 0.
1428     */
1429     CV_WRAP virtual void setActivationFunction(int type, double param1 = 0, double param2 = 0) = 0;
1430 
1431     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1432     The very first element specifies the number of elements in the input layer.
1433     The last element - number of elements in the output layer. Default value is empty Mat.
1434     @sa getLayerSizes */
1435     CV_WRAP virtual void setLayerSizes(InputArray _layer_sizes) = 0;
1436 
1437     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1438     The very first element specifies the number of elements in the input layer.
1439     The last element - number of elements in the output layer.
1440     @sa setLayerSizes */
1441     CV_WRAP virtual cv::Mat getLayerSizes() const = 0;
1442 
1443     /** Termination criteria of the training algorithm.
1444     You can specify the maximum number of iterations (maxCount) and/or how much the error could
1445     change between the iterations to make the algorithm continue (epsilon). Default value is
1446     TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01).*/
1447     /** @see setTermCriteria */
1448     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1449     /** @copybrief getTermCriteria @see getTermCriteria */
1450     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1451 
1452     /** BPROP: Strength of the weight gradient term.
1453     The recommended value is about 0.1. Default value is 0.1.*/
1454     /** @see setBackpropWeightScale */
1455     CV_WRAP virtual double getBackpropWeightScale() const = 0;
1456     /** @copybrief getBackpropWeightScale @see getBackpropWeightScale */
1457     CV_WRAP virtual void setBackpropWeightScale(double val) = 0;
1458 
1459     /** BPROP: Strength of the momentum term (the difference between weights on the 2 previous iterations).
1460     This parameter provides some inertia to smooth the random fluctuations of the weights. It can
1461     vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good enough.
1462     Default value is 0.1.*/
1463     /** @see setBackpropMomentumScale */
1464     CV_WRAP virtual double getBackpropMomentumScale() const = 0;
1465     /** @copybrief getBackpropMomentumScale @see getBackpropMomentumScale */
1466     CV_WRAP virtual void setBackpropMomentumScale(double val) = 0;
1467 
1468     /** RPROP: Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$.
1469     Default value is 0.1.*/
1470     /** @see setRpropDW0 */
1471     CV_WRAP virtual double getRpropDW0() const = 0;
1472     /** @copybrief getRpropDW0 @see getRpropDW0 */
1473     CV_WRAP virtual void setRpropDW0(double val) = 0;
1474 
1475     /** RPROP: Increase factor \f$\eta^+\f$.
1476     It must be \>1. Default value is 1.2.*/
1477     /** @see setRpropDWPlus */
1478     CV_WRAP virtual double getRpropDWPlus() const = 0;
1479     /** @copybrief getRpropDWPlus @see getRpropDWPlus */
1480     CV_WRAP virtual void setRpropDWPlus(double val) = 0;
1481 
1482     /** RPROP: Decrease factor \f$\eta^-\f$.
1483     It must be \<1. Default value is 0.5.*/
1484     /** @see setRpropDWMinus */
1485     CV_WRAP virtual double getRpropDWMinus() const = 0;
1486     /** @copybrief getRpropDWMinus @see getRpropDWMinus */
1487     CV_WRAP virtual void setRpropDWMinus(double val) = 0;
1488 
1489     /** RPROP: Update-values lower limit \f$\Delta_{min}\f$.
1490     It must be positive. Default value is FLT_EPSILON.*/
1491     /** @see setRpropDWMin */
1492     CV_WRAP virtual double getRpropDWMin() const = 0;
1493     /** @copybrief getRpropDWMin @see getRpropDWMin */
1494     CV_WRAP virtual void setRpropDWMin(double val) = 0;
1495 
1496     /** RPROP: Update-values upper limit \f$\Delta_{max}\f$.
1497     It must be \>1. Default value is 50.*/
1498     /** @see setRpropDWMax */
1499     CV_WRAP virtual double getRpropDWMax() const = 0;
1500     /** @copybrief getRpropDWMax @see getRpropDWMax */
1501     CV_WRAP virtual void setRpropDWMax(double val) = 0;
1502 
1503     /** ANNEAL: Update initial temperature.
1504     It must be \>=0. Default value is 10.*/
1505     /** @see setAnnealInitialT */
1506     CV_WRAP double getAnnealInitialT() const;
1507     /** @copybrief getAnnealInitialT @see getAnnealInitialT */
1508     CV_WRAP void setAnnealInitialT(double val);
1509 
1510     /** ANNEAL: Update final temperature.
1511     It must be \>=0 and less than initialT. Default value is 0.1.*/
1512     /** @see setAnnealFinalT */
1513     CV_WRAP double getAnnealFinalT() const;
1514     /** @copybrief getAnnealFinalT @see getAnnealFinalT */
1515     CV_WRAP void setAnnealFinalT(double val);
1516 
1517     /** ANNEAL: Update cooling ratio.
1518     It must be \>0 and less than 1. Default value is 0.95.*/
1519     /** @see setAnnealCoolingRatio */
1520     CV_WRAP double getAnnealCoolingRatio() const;
1521     /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
1522     CV_WRAP void setAnnealCoolingRatio(double val);
1523 
1524     /** ANNEAL: Update iteration per step.
1525     It must be \>0 . Default value is 10.*/
1526     /** @see setAnnealItePerStep */
1527     CV_WRAP int getAnnealItePerStep() const;
1528     /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
1529     CV_WRAP void setAnnealItePerStep(int val);
1530 
1531     /** @brief Set/initialize anneal RNG */
1532     void setAnnealEnergyRNG(const RNG& rng);
1533 
1534     /** possible activation functions */
1535     enum ActivationFunctions {
1536         /** Identity function: \f$f(x)=x\f$ */
1537         IDENTITY = 0,
1538         /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$
1539         @note
1540         If you are using the default sigmoid activation function with the default parameter values
1541         fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
1542         will range from [-1.7159, 1.7159], instead of [0,1].*/
1543         SIGMOID_SYM = 1,
1544         /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
1545         GAUSSIAN = 2,
1546         /** ReLU function: \f$f(x)=max(0,x)\f$ */
1547         RELU = 3,
1548         /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/
1549         LEAKYRELU= 4
1550     };
1551 
1552     /** Train options */
1553     enum TrainFlags {
1554         /** Update the network weights, rather than compute them from scratch. In the latter case
1555         the weights are initialized using the Nguyen-Widrow algorithm. */
1556         UPDATE_WEIGHTS = 1,
1557         /** Do not normalize the input vectors. If this flag is not set, the training algorithm
1558         normalizes each input feature independently, shifting its mean value to 0 and making the
1559         standard deviation equal to 1. If the network is assumed to be updated frequently, the new
1560         training data could be much different from original one. In this case, you should take care
1561         of proper normalization. */
1562         NO_INPUT_SCALE = 2,
1563         /** Do not normalize the output vectors. If the flag is not set, the training algorithm
1564         normalizes each output feature independently, by transforming it to the certain range
1565         depending on the used activation function. */
1566         NO_OUTPUT_SCALE = 4
1567     };
1568 
1569     CV_WRAP virtual Mat getWeights(int layerIdx) const = 0;
1570 
1571     /** @brief Creates empty model
1572 
1573     Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
1574     Note that the train method has optional flags: ANN_MLP::TrainFlags.
1575      */
1576     CV_WRAP static Ptr<ANN_MLP> create();
1577 
1578     /** @brief Loads and creates a serialized ANN from a file
1579      *
1580      * Use ANN::save to serialize and store an ANN to disk.
1581      * Load the ANN from this file again, by calling this function with the path to the file.
1582      *
1583      * @param filepath path to serialized ANN
1584      */
1585     CV_WRAP static Ptr<ANN_MLP> load(const String& filepath);
1586 
1587 };
1588 
1589 /****************************************************************************************\
1590 *                           Logistic Regression                                          *
1591 \****************************************************************************************/
1592 
1593 /** @brief Implements Logistic Regression classifier.
1594 
1595 @sa @ref ml_intro_lr
1596  */
1597 class CV_EXPORTS_W LogisticRegression : public StatModel
1598 {
1599 public:
1600 
1601     /** Learning rate. */
1602     /** @see setLearningRate */
1603     CV_WRAP virtual double getLearningRate() const = 0;
1604     /** @copybrief getLearningRate @see getLearningRate */
1605     CV_WRAP virtual void setLearningRate(double val) = 0;
1606 
1607     /** Number of iterations. */
1608     /** @see setIterations */
1609     CV_WRAP virtual int getIterations() const = 0;
1610     /** @copybrief getIterations @see getIterations */
1611     CV_WRAP virtual void setIterations(int val) = 0;
1612 
1613     /** Kind of regularization to be applied. See LogisticRegression::RegKinds. */
1614     /** @see setRegularization */
1615     CV_WRAP virtual int getRegularization() const = 0;
1616     /** @copybrief getRegularization @see getRegularization */
1617     CV_WRAP virtual void setRegularization(int val) = 0;
1618 
1619     /** Kind of training method used. See LogisticRegression::Methods. */
1620     /** @see setTrainMethod */
1621     CV_WRAP virtual int getTrainMethod() const = 0;
1622     /** @copybrief getTrainMethod @see getTrainMethod */
1623     CV_WRAP virtual void setTrainMethod(int val) = 0;
1624 
1625     /** Specifies the number of training samples taken in each step of Mini-Batch Gradient
1626     Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. It
1627     has to take values less than the total number of training samples. */
1628     /** @see setMiniBatchSize */
1629     CV_WRAP virtual int getMiniBatchSize() const = 0;
1630     /** @copybrief getMiniBatchSize @see getMiniBatchSize */
1631     CV_WRAP virtual void setMiniBatchSize(int val) = 0;
1632 
1633     /** Termination criteria of the algorithm. */
1634     /** @see setTermCriteria */
1635     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1636     /** @copybrief getTermCriteria @see getTermCriteria */
1637     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1638 
1639     //! Regularization kinds
1640     enum RegKinds {
1641         REG_DISABLE = -1, //!< Regularization disabled
1642         REG_L1 = 0, //!< %L1 norm
1643         REG_L2 = 1 //!< %L2 norm
1644     };
1645 
1646     //! Training methods
1647     enum Methods {
1648         BATCH = 0,
1649         MINI_BATCH = 1 //!< Set MiniBatchSize to a positive integer when using this method.
1650     };
1651 
1652     /** @brief Predicts responses for input samples and returns a float type.
1653 
1654     @param samples The input data for the prediction algorithm. Matrix [m x n], where each row
1655         contains variables (features) of one object being classified. Should have data type CV_32F.
1656     @param results Predicted labels as a column matrix of type CV_32S.
1657     @param flags Not used.
1658      */
1659     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
1660 
1661     /** @brief This function returns the trained parameters arranged across rows.
1662 
1663     For a two class classifcation problem, it returns a row matrix. It returns learnt parameters of
1664     the Logistic Regression as a matrix of type CV_32F.
1665      */
1666     CV_WRAP virtual Mat get_learnt_thetas() const = 0;
1667 
1668     /** @brief Creates empty model.
1669 
1670     Creates Logistic Regression model with parameters given.
1671      */
1672     CV_WRAP static Ptr<LogisticRegression> create();
1673 
1674     /** @brief Loads and creates a serialized LogisticRegression from a file
1675      *
1676      * Use LogisticRegression::save to serialize and store an LogisticRegression to disk.
1677      * Load the LogisticRegression from this file again, by calling this function with the path to the file.
1678      * Optionally specify the node for the file containing the classifier
1679      *
1680      * @param filepath path to serialized LogisticRegression
1681      * @param nodeName name of node containing the classifier
1682      */
1683     CV_WRAP static Ptr<LogisticRegression> load(const String& filepath , const String& nodeName = String());
1684 };
1685 
1686 
1687 /****************************************************************************************\
1688 *                        Stochastic Gradient Descent SVM Classifier                      *
1689 \****************************************************************************************/
1690 
1691 /*!
1692 @brief Stochastic Gradient Descent SVM classifier
1693 
1694 SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach,
1695 as presented in @cite bottou2010large.
1696 
1697 The classifier has following parameters:
1698 - model type,
1699 - margin type,
1700 - margin regularization (\f$\lambda\f$),
1701 - initial step size (\f$\gamma_0\f$),
1702 - step decreasing power (\f$c\f$),
1703 - and termination criteria.
1704 
1705 The model type may have one of the following values: \ref SGD and \ref ASGD.
1706 
1707 - \ref SGD is the classic version of SVMSGD classifier: every next step is calculated by the formula
1708   \f[w_{t+1} = w_t - \gamma(t) \frac{dQ_i}{dw} |_{w = w_t}\f]
1709   where
1710   - \f$w_t\f$ is the weights vector for decision function at step \f$t\f$,
1711   - \f$\gamma(t)\f$ is the step size of model parameters at the iteration \f$t\f$, it is decreased on each step by the formula
1712     \f$\gamma(t) = \gamma_0  (1 + \lambda  \gamma_0 t) ^ {-c}\f$
1713   - \f$Q_i\f$ is the target functional from SVM task for sample with number \f$i\f$, this sample is chosen stochastically on each step of the algorithm.
1714 
1715 - \ref ASGD is Average Stochastic Gradient Descent SVM Classifier. ASGD classifier averages weights vector on each step of algorithm by the formula
1716 \f$\widehat{w}_{t+1} = \frac{t}{1+t}\widehat{w}_{t} + \frac{1}{1+t}w_{t+1}\f$
1717 
1718 The recommended model type is ASGD (following @cite bottou2010large).
1719 
1720 The margin type may have one of the following values: \ref SOFT_MARGIN or \ref HARD_MARGIN.
1721 
1722 - You should use \ref HARD_MARGIN type, if you have linearly separable sets.
1723 - You should use \ref SOFT_MARGIN type, if you have non-linearly separable sets or sets with outliers.
1724 - In the general case (if you know nothing about linear separability of your sets), use SOFT_MARGIN.
1725 
1726 The other parameters may be described as follows:
1727 - Margin regularization parameter is responsible for weights decreasing at each step and for the strength of restrictions on outliers
1728   (the less the parameter, the less probability that an outlier will be ignored).
1729   Recommended value for SGD model is 0.0001, for ASGD model is 0.00001.
1730 
1731 - Initial step size parameter is the initial value for the step size \f$\gamma(t)\f$.
1732   You will have to find the best initial step for your problem.
1733 
1734 - Step decreasing power is the power parameter for \f$\gamma(t)\f$ decreasing by the formula, mentioned above.
1735   Recommended value for SGD model is 1, for ASGD model is 0.75.
1736 
1737 - Termination criteria can be TermCriteria::COUNT, TermCriteria::EPS or TermCriteria::COUNT + TermCriteria::EPS.
1738   You will have to find the best termination criteria for your problem.
1739 
1740 Note that the parameters margin regularization, initial step size, and step decreasing power should be positive.
1741 
1742 To use SVMSGD algorithm do as follows:
1743 
1744 - first, create the SVMSGD object. The algoorithm will set optimal parameters by default, but you can set your own parameters via functions setSvmsgdType(),
1745   setMarginType(), setMarginRegularization(), setInitialStepSize(), and setStepDecreasingPower().
1746 
1747 - then the SVM model can be trained using the train features and the correspondent labels by the method train().
1748 
1749 - after that, the label of a new feature vector can be predicted using the method predict().
1750 
1751 @code
1752 // Create empty object
1753 cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
1754 
1755 // Train the Stochastic Gradient Descent SVM
1756 svmsgd->train(trainData);
1757 
1758 // Predict labels for the new samples
1759 svmsgd->predict(samples, responses);
1760 @endcode
1761 
1762 */
1763 
1764 class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel
1765 {
1766 public:
1767 
1768     /** SVMSGD type.
1769     ASGD is often the preferable choice. */
1770     enum SvmsgdType
1771     {
1772         SGD, //!< Stochastic Gradient Descent
1773         ASGD //!< Average Stochastic Gradient Descent
1774     };
1775 
1776     /** Margin type.*/
1777     enum MarginType
1778     {
1779         SOFT_MARGIN, //!< General case, suits to the case of non-linearly separable sets, allows outliers.
1780         HARD_MARGIN  //!< More accurate for the case of linearly separable sets.
1781     };
1782 
1783     /**
1784      * @return the weights of the trained model (decision function f(x) = weights * x + shift).
1785     */
1786     CV_WRAP virtual Mat getWeights() = 0;
1787 
1788     /**
1789      * @return the shift of the trained model (decision function f(x) = weights * x + shift).
1790     */
1791     CV_WRAP virtual float getShift() = 0;
1792 
1793     /** @brief Creates empty model.
1794      * Use StatModel::train to train the model. Since %SVMSGD has several parameters, you may want to
1795      * find the best parameters for your problem or use setOptimalParameters() to set some default parameters.
1796     */
1797     CV_WRAP static Ptr<SVMSGD> create();
1798 
1799     /** @brief Loads and creates a serialized SVMSGD from a file
1800      *
1801      * Use SVMSGD::save to serialize and store an SVMSGD to disk.
1802      * Load the SVMSGD from this file again, by calling this function with the path to the file.
1803      * Optionally specify the node for the file containing the classifier
1804      *
1805      * @param filepath path to serialized SVMSGD
1806      * @param nodeName name of node containing the classifier
1807      */
1808     CV_WRAP static Ptr<SVMSGD> load(const String& filepath , const String& nodeName = String());
1809 
1810     /** @brief Function sets optimal parameters values for chosen SVM SGD model.
1811      * @param svmsgdType is the type of SVMSGD classifier.
1812      * @param marginType is the type of margin constraint.
1813     */
1814     CV_WRAP virtual void setOptimalParameters(int svmsgdType = SVMSGD::ASGD, int marginType = SVMSGD::SOFT_MARGIN) = 0;
1815 
1816     /** @brief %Algorithm type, one of SVMSGD::SvmsgdType. */
1817     /** @see setSvmsgdType */
1818     CV_WRAP virtual int getSvmsgdType() const = 0;
1819     /** @copybrief getSvmsgdType @see getSvmsgdType */
1820     CV_WRAP virtual void setSvmsgdType(int svmsgdType) = 0;
1821 
1822     /** @brief %Margin type, one of SVMSGD::MarginType. */
1823     /** @see setMarginType */
1824     CV_WRAP virtual int getMarginType() const = 0;
1825     /** @copybrief getMarginType @see getMarginType */
1826     CV_WRAP virtual void setMarginType(int marginType) = 0;
1827 
1828     /** @brief Parameter marginRegularization of a %SVMSGD optimization problem. */
1829     /** @see setMarginRegularization */
1830     CV_WRAP virtual float getMarginRegularization() const = 0;
1831     /** @copybrief getMarginRegularization @see getMarginRegularization */
1832     CV_WRAP virtual void setMarginRegularization(float marginRegularization) = 0;
1833 
1834     /** @brief Parameter initialStepSize of a %SVMSGD optimization problem. */
1835     /** @see setInitialStepSize */
1836     CV_WRAP virtual float getInitialStepSize() const = 0;
1837     /** @copybrief getInitialStepSize @see getInitialStepSize */
1838     CV_WRAP virtual void setInitialStepSize(float InitialStepSize) = 0;
1839 
1840     /** @brief Parameter stepDecreasingPower of a %SVMSGD optimization problem. */
1841     /** @see setStepDecreasingPower */
1842     CV_WRAP virtual float getStepDecreasingPower() const = 0;
1843     /** @copybrief getStepDecreasingPower @see getStepDecreasingPower */
1844     CV_WRAP virtual void setStepDecreasingPower(float stepDecreasingPower) = 0;
1845 
1846     /** @brief Termination criteria of the training algorithm.
1847     You can specify the maximum number of iterations (maxCount) and/or how much the error could
1848     change between the iterations to make the algorithm continue (epsilon).*/
1849     /** @see setTermCriteria */
1850     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1851     /** @copybrief getTermCriteria @see getTermCriteria */
1852     CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
1853 };
1854 
1855 
1856 /****************************************************************************************\
1857 *                           Auxiliary functions declarations                              *
1858 \****************************************************************************************/
1859 
1860 /** @brief Generates _sample_ from multivariate normal distribution
1861 
1862 @param mean an average row vector
1863 @param cov symmetric covariation matrix
1864 @param nsamples returned samples count
1865 @param samples returned samples array
1866 */
1867 CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples);
1868 
1869 /** @brief Creates test set */
1870 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
1871                                                 OutputArray samples, OutputArray responses);
1872 
1873 /** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
1874 
1875 @sa @ref ml_intro_ann
1876 */
1877 class CV_EXPORTS_W ANN_MLP_ANNEAL : public ANN_MLP
1878 {
1879 public:
1880     /** @see setAnnealInitialT */
1881     CV_WRAP virtual double getAnnealInitialT() const = 0;
1882     /** @copybrief getAnnealInitialT @see getAnnealInitialT */
1883     CV_WRAP virtual void setAnnealInitialT(double val) = 0;
1884 
1885     /** ANNEAL: Update final temperature.
1886     It must be \>=0 and less than initialT. Default value is 0.1.*/
1887     /** @see setAnnealFinalT */
1888     CV_WRAP  virtual double getAnnealFinalT() const = 0;
1889     /** @copybrief getAnnealFinalT @see getAnnealFinalT */
1890     CV_WRAP  virtual void setAnnealFinalT(double val) = 0;
1891 
1892     /** ANNEAL: Update cooling ratio.
1893     It must be \>0 and less than 1. Default value is 0.95.*/
1894     /** @see setAnnealCoolingRatio */
1895     CV_WRAP  virtual double getAnnealCoolingRatio() const = 0;
1896     /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
1897     CV_WRAP  virtual void setAnnealCoolingRatio(double val) = 0;
1898 
1899     /** ANNEAL: Update iteration per step.
1900     It must be \>0 . Default value is 10.*/
1901     /** @see setAnnealItePerStep */
1902     CV_WRAP virtual int getAnnealItePerStep() const = 0;
1903     /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
1904     CV_WRAP virtual void setAnnealItePerStep(int val) = 0;
1905 
1906     /** @brief Set/initialize anneal RNG */
1907     virtual void setAnnealEnergyRNG(const RNG& rng) = 0;
1908 };
1909 
1910 
1911 /****************************************************************************************\
1912 *                                   Simulated annealing solver                             *
1913 \****************************************************************************************/
1914 
1915 #ifdef CV_DOXYGEN
1916 /** @brief This class declares example interface for system state used in simulated annealing optimization algorithm.
1917 
1918 @note This class is not defined in C++ code and can't be use directly - you need your own implementation with the same methods.
1919 */
1920 struct SimulatedAnnealingSolverSystem
1921 {
1922     /** Give energy value for a state of system.*/
1923     double energy() const;
1924     /** Function which change the state of system (random perturbation).*/
1925     void changeState();
1926     /** Function to reverse to the previous state. Can be called once only after changeState(). */
1927     void reverseState();
1928 };
1929 #endif // CV_DOXYGEN
1930 
1931 /** @brief The class implements simulated annealing for optimization.
1932 
1933 @cite Kirkpatrick83 for details
1934 
1935 @param solverSystem optimization system (see SimulatedAnnealingSolverSystem)
1936 @param initialTemperature initial temperature
1937 @param finalTemperature final temperature
1938 @param coolingRatio temperature step multiplies
1939 @param iterationsPerStep number of iterations per temperature changing step
1940 @param lastTemperature optional output for last used temperature
1941 @param rngEnergy specify custom random numbers generator (cv::theRNG() by default)
1942 */
1943 template<class SimulatedAnnealingSolverSystem>
1944 int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem,
1945      double initialTemperature, double finalTemperature, double coolingRatio,
1946      size_t iterationsPerStep,
1947      CV_OUT double* lastTemperature = NULL,
1948      cv::RNG& rngEnergy = cv::theRNG()
1949 );
1950 
1951 //! @} ml
1952 
1953 }
1954 }
1955 
1956 #include <opencv2/ml/ml.inl.hpp>
1957 
1958 #endif // __cplusplus
1959 #endif // OPENCV_ML_HPP
1960 
1961 /* End of file. */
1962