1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
32 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
33 
34 #include <algorithm>
35 #include <map>
36 #include <cassert>
37 #include <cstring>
38 
39 #include "general.h"
40 #include "nn_index.h"
41 #include "matrix.h"
42 #include "result_set.h"
43 #include "heap.h"
44 #include "allocator.h"
45 #include "random.h"
46 #include "saving.h"
47 
48 namespace cvflann
49 {
50 
51 struct KDTreeSingleIndexParams : public IndexParams
52 {
53     KDTreeSingleIndexParams(int leaf_max_size = 10, bool reorder = true, int dim = -1)
54     {
55         (*this)["algorithm"] = FLANN_INDEX_KDTREE_SINGLE;
56         (*this)["leaf_max_size"] = leaf_max_size;
57         (*this)["reorder"] = reorder;
58         (*this)["dim"] = dim;
59     }
60 };
61 
62 
63 /**
64  * Randomized kd-tree index
65  *
66  * Contains the k-d trees and other information for indexing a set of points
67  * for nearest-neighbor matching.
68  */
69 template <typename Distance>
70 class KDTreeSingleIndex : public NNIndex<Distance>
71 {
72 public:
73     typedef typename Distance::ElementType ElementType;
74     typedef typename Distance::ResultType DistanceType;
75 
76 
77     /**
78      * KDTree constructor
79      *
80      * Params:
81      *          inputData = dataset with the input features
82      *          params = parameters passed to the kdtree algorithm
83      */
84     KDTreeSingleIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeSingleIndexParams(),
85                       Distance d = Distance() ) :
dataset_(inputData)86         dataset_(inputData), index_params_(params), distance_(d)
87     {
88         size_ = dataset_.rows;
89         dim_ = dataset_.cols;
90         root_node_ = 0;
91         int dim_param = get_param(params,"dim",-1);
92         if (dim_param>0) dim_ = dim_param;
93         leaf_max_size_ = get_param(params,"leaf_max_size",10);
94         reorder_ = get_param(params,"reorder",true);
95 
96         // Create a permutable array of indices to the input vectors.
97         vind_.resize(size_);
98         for (size_t i = 0; i < size_; i++) {
99             vind_[i] = (int)i;
100         }
101     }
102 
103     KDTreeSingleIndex(const KDTreeSingleIndex&);
104     KDTreeSingleIndex& operator=(const KDTreeSingleIndex&);
105 
106     /**
107      * Standard destructor
108      */
~KDTreeSingleIndex()109     ~KDTreeSingleIndex()
110     {
111         if (reorder_) delete[] data_.data;
112     }
113 
114     /**
115      * Builds the index
116      */
buildIndex()117     void buildIndex() CV_OVERRIDE
118     {
119         computeBoundingBox(root_bbox_);
120         root_node_ = divideTree(0, (int)size_, root_bbox_ );   // construct the tree
121 
122         if (reorder_) {
123             delete[] data_.data;
124             data_ = cvflann::Matrix<ElementType>(new ElementType[size_*dim_], size_, dim_);
125             for (size_t i=0; i<size_; ++i) {
126                 for (size_t j=0; j<dim_; ++j) {
127                     data_[i][j] = dataset_[vind_[i]][j];
128                 }
129             }
130         }
131         else {
132             data_ = dataset_;
133         }
134     }
135 
getType()136     flann_algorithm_t getType() const CV_OVERRIDE
137     {
138         return FLANN_INDEX_KDTREE_SINGLE;
139     }
140 
141 
saveIndex(FILE * stream)142     void saveIndex(FILE* stream) CV_OVERRIDE
143     {
144         save_value(stream, size_);
145         save_value(stream, dim_);
146         save_value(stream, root_bbox_);
147         save_value(stream, reorder_);
148         save_value(stream, leaf_max_size_);
149         save_value(stream, vind_);
150         if (reorder_) {
151             save_value(stream, data_);
152         }
153         save_tree(stream, root_node_);
154     }
155 
156 
loadIndex(FILE * stream)157     void loadIndex(FILE* stream) CV_OVERRIDE
158     {
159         load_value(stream, size_);
160         load_value(stream, dim_);
161         load_value(stream, root_bbox_);
162         load_value(stream, reorder_);
163         load_value(stream, leaf_max_size_);
164         load_value(stream, vind_);
165         if (reorder_) {
166             load_value(stream, data_);
167         }
168         else {
169             data_ = dataset_;
170         }
171         load_tree(stream, root_node_);
172 
173 
174         index_params_["algorithm"] = getType();
175         index_params_["leaf_max_size"] = leaf_max_size_;
176         index_params_["reorder"] = reorder_;
177     }
178 
179     /**
180      *  Returns size of index.
181      */
size()182     size_t size() const CV_OVERRIDE
183     {
184         return size_;
185     }
186 
187     /**
188      * Returns the length of an index feature.
189      */
veclen()190     size_t veclen() const CV_OVERRIDE
191     {
192         return dim_;
193     }
194 
195     /**
196      * Computes the inde memory usage
197      * Returns: memory used by the index
198      */
usedMemory()199     int usedMemory() const CV_OVERRIDE
200     {
201         return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int));  // pool memory and vind array memory
202     }
203 
204 
205     /**
206      * \brief Perform k-nearest neighbor search
207      * \param[in] queries The query points for which to find the nearest neighbors
208      * \param[out] indices The indices of the nearest neighbors found
209      * \param[out] dists Distances to the nearest neighbors found
210      * \param[in] knn Number of nearest neighbors to return
211      * \param[in] params Search parameters
212      */
knnSearch(const Matrix<ElementType> & queries,Matrix<int> & indices,Matrix<DistanceType> & dists,int knn,const SearchParams & params)213     void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) CV_OVERRIDE
214     {
215         assert(queries.cols == veclen());
216         assert(indices.rows >= queries.rows);
217         assert(dists.rows >= queries.rows);
218         assert(int(indices.cols) >= knn);
219         assert(int(dists.cols) >= knn);
220 
221         KNNSimpleResultSet<DistanceType> resultSet(knn);
222         for (size_t i = 0; i < queries.rows; i++) {
223             resultSet.init(indices[i], dists[i]);
224             findNeighbors(resultSet, queries[i], params);
225         }
226     }
227 
getParameters()228     IndexParams getParameters() const CV_OVERRIDE
229     {
230         return index_params_;
231     }
232 
233     /**
234      * Find set of nearest neighbors to vec. Their indices are stored inside
235      * the result object.
236      *
237      * Params:
238      *     result = the result object in which the indices of the nearest-neighbors are stored
239      *     vec = the vector for which to search the nearest neighbors
240      *     maxCheck = the maximum number of restarts (in a best-bin-first manner)
241      */
findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)242     void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
243     {
244         float epsError = 1+get_param(searchParams,"eps",0.0f);
245 
246         std::vector<DistanceType> dists(dim_,0);
247         DistanceType distsq = computeInitialDistances(vec, dists);
248         searchLevel(result, vec, root_node_, distsq, dists, epsError);
249     }
250 
251 private:
252 
253 
254     /*--------------------- Internal Data Structures --------------------------*/
255     struct Node
256     {
257         /**
258          * Indices of points in leaf node
259          */
260         int left, right;
261         /**
262          * Dimension used for subdivision.
263          */
264         int divfeat;
265         /**
266          * The values used for subdivision.
267          */
268         DistanceType divlow, divhigh;
269         /**
270          * The child nodes.
271          */
272         Node* child1, * child2;
273     };
274     typedef Node* NodePtr;
275 
276 
277     struct Interval
278     {
279         DistanceType low, high;
280     };
281 
282     typedef std::vector<Interval> BoundingBox;
283 
284     typedef BranchStruct<NodePtr, DistanceType> BranchSt;
285     typedef BranchSt* Branch;
286 
287 
288 
289 
save_tree(FILE * stream,NodePtr tree)290     void save_tree(FILE* stream, NodePtr tree)
291     {
292         save_value(stream, *tree);
293         if (tree->child1!=NULL) {
294             save_tree(stream, tree->child1);
295         }
296         if (tree->child2!=NULL) {
297             save_tree(stream, tree->child2);
298         }
299     }
300 
301 
load_tree(FILE * stream,NodePtr & tree)302     void load_tree(FILE* stream, NodePtr& tree)
303     {
304         tree = pool_.allocate<Node>();
305         load_value(stream, *tree);
306         if (tree->child1!=NULL) {
307             load_tree(stream, tree->child1);
308         }
309         if (tree->child2!=NULL) {
310             load_tree(stream, tree->child2);
311         }
312     }
313 
314 
computeBoundingBox(BoundingBox & bbox)315     void computeBoundingBox(BoundingBox& bbox)
316     {
317         bbox.resize(dim_);
318         for (size_t i=0; i<dim_; ++i) {
319             bbox[i].low = (DistanceType)dataset_[0][i];
320             bbox[i].high = (DistanceType)dataset_[0][i];
321         }
322         for (size_t k=1; k<dataset_.rows; ++k) {
323             for (size_t i=0; i<dim_; ++i) {
324                 if (dataset_[k][i]<bbox[i].low) bbox[i].low = (DistanceType)dataset_[k][i];
325                 if (dataset_[k][i]>bbox[i].high) bbox[i].high = (DistanceType)dataset_[k][i];
326             }
327         }
328     }
329 
330 
331     /**
332      * Create a tree node that subdivides the list of vecs from vind[first]
333      * to vind[last].  The routine is called recursively on each sublist.
334      * Place a pointer to this new tree node in the location pTree.
335      *
336      * Params: pTree = the new node to create
337      *                  first = index of the first vector
338      *                  last = index of the last vector
339      */
divideTree(int left,int right,BoundingBox & bbox)340     NodePtr divideTree(int left, int right, BoundingBox& bbox)
341     {
342         NodePtr node = pool_.allocate<Node>(); // allocate memory
343 
344         /* If too few exemplars remain, then make this a leaf node. */
345         if ( (right-left) <= leaf_max_size_) {
346             node->child1 = node->child2 = NULL;    /* Mark as leaf node. */
347             node->left = left;
348             node->right = right;
349 
350             // compute bounding-box of leaf points
351             for (size_t i=0; i<dim_; ++i) {
352                 bbox[i].low = (DistanceType)dataset_[vind_[left]][i];
353                 bbox[i].high = (DistanceType)dataset_[vind_[left]][i];
354             }
355             for (int k=left+1; k<right; ++k) {
356                 for (size_t i=0; i<dim_; ++i) {
357                     if (bbox[i].low>dataset_[vind_[k]][i]) bbox[i].low=(DistanceType)dataset_[vind_[k]][i];
358                     if (bbox[i].high<dataset_[vind_[k]][i]) bbox[i].high=(DistanceType)dataset_[vind_[k]][i];
359                 }
360             }
361         }
362         else {
363             int idx;
364             int cutfeat;
365             DistanceType cutval;
366             middleSplit_(&vind_[0]+left, right-left, idx, cutfeat, cutval, bbox);
367 
368             node->divfeat = cutfeat;
369 
370             BoundingBox left_bbox(bbox);
371             left_bbox[cutfeat].high = cutval;
372             node->child1 = divideTree(left, left+idx, left_bbox);
373 
374             BoundingBox right_bbox(bbox);
375             right_bbox[cutfeat].low = cutval;
376             node->child2 = divideTree(left+idx, right, right_bbox);
377 
378             node->divlow = left_bbox[cutfeat].high;
379             node->divhigh = right_bbox[cutfeat].low;
380 
381             for (size_t i=0; i<dim_; ++i) {
382                 bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
383                 bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
384             }
385         }
386 
387         return node;
388     }
389 
computeMinMax(int * ind,int count,int dim,ElementType & min_elem,ElementType & max_elem)390     void computeMinMax(int* ind, int count, int dim, ElementType& min_elem, ElementType& max_elem)
391     {
392         min_elem = dataset_[ind[0]][dim];
393         max_elem = dataset_[ind[0]][dim];
394         for (int i=1; i<count; ++i) {
395             ElementType val = dataset_[ind[i]][dim];
396             if (val<min_elem) min_elem = val;
397             if (val>max_elem) max_elem = val;
398         }
399     }
400 
middleSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)401     void middleSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
402     {
403         // find the largest span from the approximate bounding box
404         ElementType max_span = bbox[0].high-bbox[0].low;
405         cutfeat = 0;
406         cutval = (bbox[0].high+bbox[0].low)/2;
407         for (size_t i=1; i<dim_; ++i) {
408             ElementType span = bbox[i].high-bbox[i].low;
409             if (span>max_span) {
410                 max_span = span;
411                 cutfeat = i;
412                 cutval = (bbox[i].high+bbox[i].low)/2;
413             }
414         }
415 
416         // compute exact span on the found dimension
417         ElementType min_elem, max_elem;
418         computeMinMax(ind, count, cutfeat, min_elem, max_elem);
419         cutval = (min_elem+max_elem)/2;
420         max_span = max_elem - min_elem;
421 
422         // check if a dimension of a largest span exists
423         size_t k = cutfeat;
424         for (size_t i=0; i<dim_; ++i) {
425             if (i==k) continue;
426             ElementType span = bbox[i].high-bbox[i].low;
427             if (span>max_span) {
428                 computeMinMax(ind, count, i, min_elem, max_elem);
429                 span = max_elem - min_elem;
430                 if (span>max_span) {
431                     max_span = span;
432                     cutfeat = i;
433                     cutval = (min_elem+max_elem)/2;
434                 }
435             }
436         }
437         int lim1, lim2;
438         planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
439 
440         if (lim1>count/2) index = lim1;
441         else if (lim2<count/2) index = lim2;
442         else index = count/2;
443     }
444 
445 
middleSplit_(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)446     void middleSplit_(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
447     {
448         const float EPS=0.00001f;
449         DistanceType max_span = bbox[0].high-bbox[0].low;
450         for (size_t i=1; i<dim_; ++i) {
451             DistanceType span = bbox[i].high-bbox[i].low;
452             if (span>max_span) {
453                 max_span = span;
454             }
455         }
456         DistanceType max_spread = -1;
457         cutfeat = 0;
458         for (size_t i=0; i<dim_; ++i) {
459             DistanceType span = bbox[i].high-bbox[i].low;
460             if (span>(DistanceType)((1-EPS)*max_span)) {
461                 ElementType min_elem, max_elem;
462                 computeMinMax(ind, count, cutfeat, min_elem, max_elem);
463                 DistanceType spread = (DistanceType)(max_elem-min_elem);
464                 if (spread>max_spread) {
465                     cutfeat = (int)i;
466                     max_spread = spread;
467                 }
468             }
469         }
470         // split in the middle
471         DistanceType split_val = (bbox[cutfeat].low+bbox[cutfeat].high)/2;
472         ElementType min_elem, max_elem;
473         computeMinMax(ind, count, cutfeat, min_elem, max_elem);
474 
475         if (split_val<min_elem) cutval = (DistanceType)min_elem;
476         else if (split_val>max_elem) cutval = (DistanceType)max_elem;
477         else cutval = split_val;
478 
479         int lim1, lim2;
480         planeSplit(ind, count, cutfeat, cutval, lim1, lim2);
481 
482         if (lim1>count/2) index = lim1;
483         else if (lim2<count/2) index = lim2;
484         else index = count/2;
485     }
486 
487 
488     /**
489      *  Subdivide the list of points by a plane perpendicular on axe corresponding
490      *  to the 'cutfeat' dimension at 'cutval' position.
491      *
492      *  On return:
493      *  dataset[ind[0..lim1-1]][cutfeat]<cutval
494      *  dataset[ind[lim1..lim2-1]][cutfeat]==cutval
495      *  dataset[ind[lim2..count]][cutfeat]>cutval
496      */
planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)497     void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2)
498     {
499         /* Move vector indices for left subtree to front of list. */
500         int left = 0;
501         int right = count-1;
502         for (;; ) {
503             while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left;
504             while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right;
505             if (left>right) break;
506             std::swap(ind[left], ind[right]); ++left; --right;
507         }
508         /* If either list is empty, it means that all remaining features
509          * are identical. Split in the middle to maintain a balanced tree.
510          */
511         lim1 = left;
512         right = count-1;
513         for (;; ) {
514             while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left;
515             while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right;
516             if (left>right) break;
517             std::swap(ind[left], ind[right]); ++left; --right;
518         }
519         lim2 = left;
520     }
521 
computeInitialDistances(const ElementType * vec,std::vector<DistanceType> & dists)522     DistanceType computeInitialDistances(const ElementType* vec, std::vector<DistanceType>& dists)
523     {
524         DistanceType distsq = 0.0;
525 
526         for (size_t i = 0; i < dim_; ++i) {
527             if (vec[i] < root_bbox_[i].low) {
528                 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].low, (int)i);
529                 distsq += dists[i];
530             }
531             if (vec[i] > root_bbox_[i].high) {
532                 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].high, (int)i);
533                 distsq += dists[i];
534             }
535         }
536 
537         return distsq;
538     }
539 
540     /**
541      * Performs an exact search in the tree starting from a node.
542      */
searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindistsq,std::vector<DistanceType> & dists,const float epsError)543     void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq,
544                      std::vector<DistanceType>& dists, const float epsError)
545     {
546         /* If this is a leaf node, then do check and return. */
547         if ((node->child1 == NULL)&&(node->child2 == NULL)) {
548             DistanceType worst_dist = result_set.worstDist();
549             for (int i=node->left; i<node->right; ++i) {
550                 int index = reorder_ ? i : vind_[i];
551                 DistanceType dist = distance_(vec, data_[index], dim_, worst_dist);
552                 if (dist<worst_dist) {
553                     result_set.addPoint(dist,vind_[i]);
554                 }
555             }
556             return;
557         }
558 
559         /* Which child branch should be taken first? */
560         int idx = node->divfeat;
561         ElementType val = vec[idx];
562         DistanceType diff1 = val - node->divlow;
563         DistanceType diff2 = val - node->divhigh;
564 
565         NodePtr bestChild;
566         NodePtr otherChild;
567         DistanceType cut_dist;
568         if ((diff1+diff2)<0) {
569             bestChild = node->child1;
570             otherChild = node->child2;
571             cut_dist = distance_.accum_dist(val, node->divhigh, idx);
572         }
573         else {
574             bestChild = node->child2;
575             otherChild = node->child1;
576             cut_dist = distance_.accum_dist( val, node->divlow, idx);
577         }
578 
579         /* Call recursively to search next level down. */
580         searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError);
581 
582         DistanceType dst = dists[idx];
583         mindistsq = mindistsq + cut_dist - dst;
584         dists[idx] = cut_dist;
585         if (mindistsq*epsError<=result_set.worstDist()) {
586             searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError);
587         }
588         dists[idx] = dst;
589     }
590 
591 private:
592 
593     /**
594      * The dataset used by this index
595      */
596     const Matrix<ElementType> dataset_;
597 
598     IndexParams index_params_;
599 
600     int leaf_max_size_;
601     bool reorder_;
602 
603 
604     /**
605      *  Array of indices to vectors in the dataset.
606      */
607     std::vector<int> vind_;
608 
609     Matrix<ElementType> data_;
610 
611     size_t size_;
612     size_t dim_;
613 
614     /**
615      * Array of k-d trees used to find neighbours.
616      */
617     NodePtr root_node_;
618 
619     BoundingBox root_bbox_;
620 
621     /**
622      * Pooled memory allocator.
623      *
624      * Using a pooled memory allocator is more efficient
625      * than allocating memory directly when there is a large
626      * number small of memory allocations.
627      */
628     PooledAllocator pool_;
629 
630     Distance distance_;
631 };   // class KDTree
632 
633 }
634 
635 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
636