1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
43 #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
44 #include <opencv2/dnn.hpp>
45 
46 namespace cv {
47 namespace dnn {
48 CV__DNN_EXPERIMENTAL_NS_BEGIN
49 //! @addtogroup dnn
50 //! @{
51 
52 /** @defgroup dnnLayerList Partial List of Implemented Layers
53   @{
54   This subsection of dnn module contains information about built-in layers and their descriptions.
55 
56   Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
57   In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
58   You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
59 
60   Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
61   In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
62   - Convolution
63   - Deconvolution
64   - Pooling
65   - InnerProduct
66   - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
67   - Softmax
68   - Reshape, Flatten, Slice, Split
69   - LRN
70   - MVN
71   - Dropout (since it does nothing on forward pass -))
72 */
73 
74     class CV_EXPORTS BlankLayer : public Layer
75     {
76     public:
77         static Ptr<Layer> create(const LayerParams &params);
78     };
79 
80     /**
81      * Constant layer produces the same data blob at an every forward pass.
82      */
83     class CV_EXPORTS ConstLayer : public Layer
84     {
85     public:
86         static Ptr<Layer> create(const LayerParams &params);
87     };
88 
89     //! LSTM recurrent layer
90     class CV_EXPORTS LSTMLayer : public Layer
91     {
92     public:
93         /** Creates instance of LSTM layer */
94         static Ptr<LSTMLayer> create(const LayerParams& params);
95 
96         /** @deprecated Use LayerParams::blobs instead.
97         @brief Set trained weights for LSTM layer.
98 
99         LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
100 
101         Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
102         Than current output and current cell state is computed as follows:
103         @f{eqnarray*}{
104         h_t &= o_t \odot tanh(c_t),               \\
105         c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
106         @f}
107         where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
108 
109         Gates are computed as follows:
110         @f{eqnarray*}{
111         i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
112         f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
113         o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
114         g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
115         @f}
116         where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
117         @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
118 
119         For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
120         (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
121         The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
122         and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
123 
124         @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
125         @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
126         @param b  is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
127         */
128         CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
129 
130         /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
131           * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
132           * where `Wh` is parameter from setWeights().
133           */
134         virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
135 
136         /** @deprecated Use flag `produce_cell_output` in LayerParams.
137           * @brief Specifies either interpret first dimension of input blob as timestamp dimenion either as sample.
138           *
139           * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
140           * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
141           *
142           * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
143           * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
144           */
145         CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
146 
147         /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
148          * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
149          * @details Shape of the second output is the same as first output.
150          */
151         CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
152 
153         /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
154          * @param input should contain packed values @f$x_t@f$
155          * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
156          *
157          * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
158          * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
159          *
160          * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
161          * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
162         */
163 
164         int inputNameToIndex(String inputName) CV_OVERRIDE;
165         int outputNameToIndex(const String& outputName) CV_OVERRIDE;
166     };
167 
168     /** @brief Classical recurrent layer
169 
170     Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
171 
172     - input: should contain packed input @f$x_t@f$.
173     - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
174 
175     input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
176 
177     output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
178 
179     If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
180     */
181     class CV_EXPORTS RNNLayer : public Layer
182     {
183     public:
184         /** Creates instance of RNNLayer */
185         static Ptr<RNNLayer> create(const LayerParams& params);
186 
187         /** Setups learned weights.
188 
189         Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
190         @f{eqnarray*}{
191         h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h),  \\
192         o_t &= tanh&(W_{ho} h_t + b_o),
193         @f}
194 
195         @param Wxh is @f$ W_{xh} @f$ matrix
196         @param bh  is @f$ b_{h}  @f$ vector
197         @param Whh is @f$ W_{hh} @f$ matrix
198         @param Who is @f$ W_{xo} @f$ matrix
199         @param bo  is @f$ b_{o}  @f$ vector
200         */
201         virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
202 
203         /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
204          * @details Shape of the second output is the same as first output.
205          */
206         virtual void setProduceHiddenOutput(bool produce = false) = 0;
207 
208     };
209 
210     class CV_EXPORTS BaseConvolutionLayer : public Layer
211     {
212     public:
213         Size kernel, stride, pad, dilation, adjustPad;
214         String padMode;
215         int numOutput;
216     };
217 
218     class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
219     {
220     public:
221         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
222     };
223 
224     class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
225     {
226     public:
227         static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
228     };
229 
230     class CV_EXPORTS LRNLayer : public Layer
231     {
232     public:
233         int type;
234 
235         int size;
236         float alpha, beta, bias;
237         bool normBySize;
238 
239         static Ptr<LRNLayer> create(const LayerParams& params);
240     };
241 
242     class CV_EXPORTS PoolingLayer : public Layer
243     {
244     public:
245         int type;
246         Size kernel, stride;
247         int pad_l, pad_t, pad_r, pad_b;
248         CV_DEPRECATED_EXTERNAL Size pad;
249         bool globalPooling;
250         bool computeMaxIdx;
251         String padMode;
252         bool ceilMode;
253         // If true for average pooling with padding, divide an every output region
254         // by a whole kernel area. Otherwise exclude zero padded values and divide
255         // by number of real values.
256         bool avePoolPaddedArea;
257         // ROIPooling parameters.
258         Size pooledSize;
259         float spatialScale;
260         // PSROIPooling parameters.
261         int psRoiOutChannels;
262 
263         static Ptr<PoolingLayer> create(const LayerParams& params);
264     };
265 
266     class CV_EXPORTS SoftmaxLayer : public Layer
267     {
268     public:
269         bool logSoftMax;
270 
271         static Ptr<SoftmaxLayer> create(const LayerParams& params);
272     };
273 
274     class CV_EXPORTS InnerProductLayer : public Layer
275     {
276     public:
277         int axis;
278         static Ptr<InnerProductLayer> create(const LayerParams& params);
279     };
280 
281     class CV_EXPORTS MVNLayer : public Layer
282     {
283     public:
284         float eps;
285         bool normVariance, acrossChannels;
286 
287         static Ptr<MVNLayer> create(const LayerParams& params);
288     };
289 
290     /* Reshaping */
291 
292     class CV_EXPORTS ReshapeLayer : public Layer
293     {
294     public:
295         MatShape newShapeDesc;
296         Range newShapeRange;
297 
298         static Ptr<ReshapeLayer> create(const LayerParams& params);
299     };
300 
301     class CV_EXPORTS FlattenLayer : public Layer
302     {
303     public:
304         static Ptr<FlattenLayer> create(const LayerParams &params);
305     };
306 
307     class CV_EXPORTS ConcatLayer : public Layer
308     {
309     public:
310         int axis;
311         /**
312          * @brief Add zero padding in case of concatenation of blobs with different
313          * spatial sizes.
314          *
315          * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
316          */
317         bool padding;
318 
319         static Ptr<ConcatLayer> create(const LayerParams &params);
320     };
321 
322     class CV_EXPORTS SplitLayer : public Layer
323     {
324     public:
325         int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
326 
327         static Ptr<SplitLayer> create(const LayerParams &params);
328     };
329 
330     /**
331      * Slice layer has several modes:
332      * 1. Caffe mode
333      * @param[in] axis Axis of split operation
334      * @param[in] slice_point Array of split points
335      *
336      * Number of output blobs equals to number of split points plus one. The
337      * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
338      * the second output blob is a slice of input from @p slice_point[0] to
339      * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
340      * input from @p slice_point[-1] up to the end of @p axis size.
341      *
342      * 2. TensorFlow mode
343      * @param begin Vector of start indices
344      * @param size Vector of sizes
345      *
346      * More convenient numpy-like slice. One and only output blob
347      * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
348      *
349      * 3. Torch mode
350      * @param axis Axis of split operation
351      *
352      * Split input blob on the equal parts by @p axis.
353      */
354     class CV_EXPORTS SliceLayer : public Layer
355     {
356     public:
357         /**
358          * @brief Vector of slice ranges.
359          *
360          * The first dimension equals number of output blobs.
361          * Inner vector has slice ranges for the first number of input dimensions.
362          */
363         std::vector<std::vector<Range> > sliceRanges;
364         int axis;
365 
366         static Ptr<SliceLayer> create(const LayerParams &params);
367     };
368 
369     class CV_EXPORTS PermuteLayer : public Layer
370     {
371     public:
372         static Ptr<PermuteLayer> create(const LayerParams& params);
373     };
374 
375     /**
376      * Permute channels of 4-dimensional input blob.
377      * @param group Number of groups to split input channels and pick in turns
378      *              into output blob.
379      *
380      * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
381      * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
382      * Read more at https://arxiv.org/pdf/1707.01083.pdf
383      */
384     class CV_EXPORTS ShuffleChannelLayer : public Layer
385     {
386     public:
387         static Ptr<Layer> create(const LayerParams& params);
388 
389         int group;
390     };
391 
392     /**
393      * @brief Adds extra values for specific axes.
394      * @param paddings Vector of paddings in format
395      *                 @code
396      *                 [ pad_before, pad_after,  // [0]th dimension
397      *                   pad_before, pad_after,  // [1]st dimension
398      *                   ...
399      *                   pad_before, pad_after ] // [n]th dimension
400      *                 @endcode
401      *                 that represents number of padded values at every dimension
402      *                 starting from the first one. The rest of dimensions won't
403      *                 be padded.
404      * @param value Value to be padded. Defaults to zero.
405      * @param type Padding type: 'constant', 'reflect'
406      * @param input_dims Torch's parameter. If @p input_dims is not equal to the
407      *                   actual input dimensionality then the `[0]th` dimension
408      *                   is considered as a batch dimension and @p paddings are shifted
409      *                   to a one dimension. Defaults to `-1` that means padding
410      *                   corresponding to @p paddings.
411      */
412     class CV_EXPORTS PaddingLayer : public Layer
413     {
414     public:
415         static Ptr<PaddingLayer> create(const LayerParams& params);
416     };
417 
418     /* Activations */
419     class CV_EXPORTS ActivationLayer : public Layer
420     {
421     public:
422         virtual void forwardSlice(const float* src, float* dst, int len,
423                                   size_t outPlaneSize, int cn0, int cn1) const = 0;
424     };
425 
426     class CV_EXPORTS ReLULayer : public ActivationLayer
427     {
428     public:
429         float negativeSlope;
430 
431         static Ptr<ReLULayer> create(const LayerParams &params);
432     };
433 
434     class CV_EXPORTS ReLU6Layer : public ActivationLayer
435     {
436     public:
437         float minValue, maxValue;
438 
439         static Ptr<ReLU6Layer> create(const LayerParams &params);
440     };
441 
442     class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
443     {
444     public:
445         static Ptr<Layer> create(const LayerParams& params);
446     };
447 
448     class CV_EXPORTS ELULayer : public ActivationLayer
449     {
450     public:
451         static Ptr<ELULayer> create(const LayerParams &params);
452     };
453 
454     class CV_EXPORTS TanHLayer : public ActivationLayer
455     {
456     public:
457         static Ptr<TanHLayer> create(const LayerParams &params);
458     };
459 
460     class CV_EXPORTS SigmoidLayer : public ActivationLayer
461     {
462     public:
463         static Ptr<SigmoidLayer> create(const LayerParams &params);
464     };
465 
466     class CV_EXPORTS BNLLLayer : public ActivationLayer
467     {
468     public:
469         static Ptr<BNLLLayer> create(const LayerParams &params);
470     };
471 
472     class CV_EXPORTS AbsLayer : public ActivationLayer
473     {
474     public:
475         static Ptr<AbsLayer> create(const LayerParams &params);
476     };
477 
478     class CV_EXPORTS PowerLayer : public ActivationLayer
479     {
480     public:
481         float power, scale, shift;
482 
483         static Ptr<PowerLayer> create(const LayerParams &params);
484     };
485 
486     /* Layers used in semantic segmentation */
487 
488     class CV_EXPORTS CropLayer : public Layer
489     {
490     public:
491         int startAxis;
492         std::vector<int> offset;
493 
494         static Ptr<CropLayer> create(const LayerParams &params);
495     };
496 
497     class CV_EXPORTS EltwiseLayer : public Layer
498     {
499     public:
500         static Ptr<EltwiseLayer> create(const LayerParams &params);
501     };
502 
503     class CV_EXPORTS BatchNormLayer : public ActivationLayer
504     {
505     public:
506         bool hasWeights, hasBias;
507         float epsilon;
508 
509         static Ptr<BatchNormLayer> create(const LayerParams &params);
510     };
511 
512     class CV_EXPORTS MaxUnpoolLayer : public Layer
513     {
514     public:
515         Size poolKernel;
516         Size poolPad;
517         Size poolStride;
518 
519         static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
520     };
521 
522     class CV_EXPORTS ScaleLayer : public Layer
523     {
524     public:
525         bool hasBias;
526         int axis;
527 
528         static Ptr<ScaleLayer> create(const LayerParams& params);
529     };
530 
531     class CV_EXPORTS ShiftLayer : public Layer
532     {
533     public:
534         static Ptr<Layer> create(const LayerParams& params);
535     };
536 
537     class CV_EXPORTS PriorBoxLayer : public Layer
538     {
539     public:
540         static Ptr<PriorBoxLayer> create(const LayerParams& params);
541     };
542 
543     class CV_EXPORTS ReorgLayer : public Layer
544     {
545     public:
546         static Ptr<ReorgLayer> create(const LayerParams& params);
547     };
548 
549     class CV_EXPORTS RegionLayer : public Layer
550     {
551     public:
552         static Ptr<RegionLayer> create(const LayerParams& params);
553     };
554 
555     class CV_EXPORTS DetectionOutputLayer : public Layer
556     {
557     public:
558         static Ptr<DetectionOutputLayer> create(const LayerParams& params);
559     };
560 
561     /**
562      * @brief \f$ L_p \f$ - normalization layer.
563      * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
564      *          normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
565      * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
566      * @param across_spatial If true, normalize an input across all non-batch dimensions.
567      *                       Otherwise normalize an every channel separately.
568      *
569      * Across spatial:
570      * @f[
571      * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
572      * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
573      * @f]
574      *
575      * Channel wise normalization:
576      * @f[
577      * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
578      * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
579      * @f]
580      *
581      * Where `x, y` - spatial coordinates, `c` - channel.
582      *
583      * An every sample in the batch is normalized separately. Optionally,
584      * output is scaled by the trained parameters.
585      */
586     class CV_EXPORTS NormalizeBBoxLayer : public Layer
587     {
588     public:
589         float pnorm, epsilon;
590         CV_DEPRECATED_EXTERNAL bool acrossSpatial;
591 
592         static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
593     };
594 
595     /**
596      * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
597      *
598      * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
599      */
600     class CV_EXPORTS ResizeLayer : public Layer
601     {
602     public:
603         static Ptr<ResizeLayer> create(const LayerParams& params);
604     };
605 
606     /**
607      * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public
608      *
609      * It differs from @ref ResizeLayer in output shape and resize scales computations.
610      */
611     class CV_EXPORTS InterpLayer : public Layer
612     {
613     public:
614         static Ptr<Layer> create(const LayerParams& params);
615     };
616 
617     class CV_EXPORTS ProposalLayer : public Layer
618     {
619     public:
620         static Ptr<ProposalLayer> create(const LayerParams& params);
621     };
622 
623     class CV_EXPORTS CropAndResizeLayer : public Layer
624     {
625     public:
626         static Ptr<Layer> create(const LayerParams& params);
627     };
628 
629 //! @}
630 //! @}
631 CV__DNN_EXPERIMENTAL_NS_END
632 }
633 }
634 #endif
635