1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 31 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 32 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 33 34 #include <algorithm> 35 #include <map> 36 #include <cassert> 37 #include <cstring> 38 39 #include "general.h" 40 #include "nn_index.h" 41 #include "matrix.h" 42 #include "result_set.h" 43 #include "heap.h" 44 #include "allocator.h" 45 #include "random.h" 46 #include "saving.h" 47 48 namespace cvflann 49 { 50 51 struct KDTreeSingleIndexParams : public IndexParams 52 { 53 KDTreeSingleIndexParams(int leaf_max_size = 10, bool reorder = true, int dim = -1) 54 { 55 (*this)["algorithm"] = FLANN_INDEX_KDTREE_SINGLE; 56 (*this)["leaf_max_size"] = leaf_max_size; 57 (*this)["reorder"] = reorder; 58 (*this)["dim"] = dim; 59 } 60 }; 61 62 63 /** 64 * Randomized kd-tree index 65 * 66 * Contains the k-d trees and other information for indexing a set of points 67 * for nearest-neighbor matching. 68 */ 69 template <typename Distance> 70 class KDTreeSingleIndex : public NNIndex<Distance> 71 { 72 public: 73 typedef typename Distance::ElementType ElementType; 74 typedef typename Distance::ResultType DistanceType; 75 76 77 /** 78 * KDTree constructor 79 * 80 * Params: 81 * inputData = dataset with the input features 82 * params = parameters passed to the kdtree algorithm 83 */ 84 KDTreeSingleIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeSingleIndexParams(), 85 Distance d = Distance() ) : dataset_(inputData)86 dataset_(inputData), index_params_(params), distance_(d) 87 { 88 size_ = dataset_.rows; 89 dim_ = dataset_.cols; 90 root_node_ = 0; 91 int dim_param = get_param(params,"dim",-1); 92 if (dim_param>0) dim_ = dim_param; 93 leaf_max_size_ = get_param(params,"leaf_max_size",10); 94 reorder_ = get_param(params,"reorder",true); 95 96 // Create a permutable array of indices to the input vectors. 97 vind_.resize(size_); 98 for (size_t i = 0; i < size_; i++) { 99 vind_[i] = (int)i; 100 } 101 } 102 103 KDTreeSingleIndex(const KDTreeSingleIndex&); 104 KDTreeSingleIndex& operator=(const KDTreeSingleIndex&); 105 106 /** 107 * Standard destructor 108 */ ~KDTreeSingleIndex()109 ~KDTreeSingleIndex() 110 { 111 if (reorder_) delete[] data_.data; 112 } 113 114 /** 115 * Builds the index 116 */ buildIndex()117 void buildIndex() CV_OVERRIDE 118 { 119 computeBoundingBox(root_bbox_); 120 root_node_ = divideTree(0, (int)size_, root_bbox_ ); // construct the tree 121 122 if (reorder_) { 123 delete[] data_.data; 124 data_ = cvflann::Matrix<ElementType>(new ElementType[size_*dim_], size_, dim_); 125 for (size_t i=0; i<size_; ++i) { 126 for (size_t j=0; j<dim_; ++j) { 127 data_[i][j] = dataset_[vind_[i]][j]; 128 } 129 } 130 } 131 else { 132 data_ = dataset_; 133 } 134 } 135 getType()136 flann_algorithm_t getType() const CV_OVERRIDE 137 { 138 return FLANN_INDEX_KDTREE_SINGLE; 139 } 140 141 saveIndex(FILE * stream)142 void saveIndex(FILE* stream) CV_OVERRIDE 143 { 144 save_value(stream, size_); 145 save_value(stream, dim_); 146 save_value(stream, root_bbox_); 147 save_value(stream, reorder_); 148 save_value(stream, leaf_max_size_); 149 save_value(stream, vind_); 150 if (reorder_) { 151 save_value(stream, data_); 152 } 153 save_tree(stream, root_node_); 154 } 155 156 loadIndex(FILE * stream)157 void loadIndex(FILE* stream) CV_OVERRIDE 158 { 159 load_value(stream, size_); 160 load_value(stream, dim_); 161 load_value(stream, root_bbox_); 162 load_value(stream, reorder_); 163 load_value(stream, leaf_max_size_); 164 load_value(stream, vind_); 165 if (reorder_) { 166 load_value(stream, data_); 167 } 168 else { 169 data_ = dataset_; 170 } 171 load_tree(stream, root_node_); 172 173 174 index_params_["algorithm"] = getType(); 175 index_params_["leaf_max_size"] = leaf_max_size_; 176 index_params_["reorder"] = reorder_; 177 } 178 179 /** 180 * Returns size of index. 181 */ size()182 size_t size() const CV_OVERRIDE 183 { 184 return size_; 185 } 186 187 /** 188 * Returns the length of an index feature. 189 */ veclen()190 size_t veclen() const CV_OVERRIDE 191 { 192 return dim_; 193 } 194 195 /** 196 * Computes the inde memory usage 197 * Returns: memory used by the index 198 */ usedMemory()199 int usedMemory() const CV_OVERRIDE 200 { 201 return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory 202 } 203 204 205 /** 206 * \brief Perform k-nearest neighbor search 207 * \param[in] queries The query points for which to find the nearest neighbors 208 * \param[out] indices The indices of the nearest neighbors found 209 * \param[out] dists Distances to the nearest neighbors found 210 * \param[in] knn Number of nearest neighbors to return 211 * \param[in] params Search parameters 212 */ knnSearch(const Matrix<ElementType> & queries,Matrix<int> & indices,Matrix<DistanceType> & dists,int knn,const SearchParams & params)213 void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) CV_OVERRIDE 214 { 215 assert(queries.cols == veclen()); 216 assert(indices.rows >= queries.rows); 217 assert(dists.rows >= queries.rows); 218 assert(int(indices.cols) >= knn); 219 assert(int(dists.cols) >= knn); 220 221 KNNSimpleResultSet<DistanceType> resultSet(knn); 222 for (size_t i = 0; i < queries.rows; i++) { 223 resultSet.init(indices[i], dists[i]); 224 findNeighbors(resultSet, queries[i], params); 225 } 226 } 227 getParameters()228 IndexParams getParameters() const CV_OVERRIDE 229 { 230 return index_params_; 231 } 232 233 /** 234 * Find set of nearest neighbors to vec. Their indices are stored inside 235 * the result object. 236 * 237 * Params: 238 * result = the result object in which the indices of the nearest-neighbors are stored 239 * vec = the vector for which to search the nearest neighbors 240 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 241 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)242 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE 243 { 244 float epsError = 1+get_param(searchParams,"eps",0.0f); 245 246 std::vector<DistanceType> dists(dim_,0); 247 DistanceType distsq = computeInitialDistances(vec, dists); 248 searchLevel(result, vec, root_node_, distsq, dists, epsError); 249 } 250 251 private: 252 253 254 /*--------------------- Internal Data Structures --------------------------*/ 255 struct Node 256 { 257 /** 258 * Indices of points in leaf node 259 */ 260 int left, right; 261 /** 262 * Dimension used for subdivision. 263 */ 264 int divfeat; 265 /** 266 * The values used for subdivision. 267 */ 268 DistanceType divlow, divhigh; 269 /** 270 * The child nodes. 271 */ 272 Node* child1, * child2; 273 }; 274 typedef Node* NodePtr; 275 276 277 struct Interval 278 { 279 DistanceType low, high; 280 }; 281 282 typedef std::vector<Interval> BoundingBox; 283 284 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 285 typedef BranchSt* Branch; 286 287 288 289 save_tree(FILE * stream,NodePtr tree)290 void save_tree(FILE* stream, NodePtr tree) 291 { 292 save_value(stream, *tree); 293 if (tree->child1!=NULL) { 294 save_tree(stream, tree->child1); 295 } 296 if (tree->child2!=NULL) { 297 save_tree(stream, tree->child2); 298 } 299 } 300 301 load_tree(FILE * stream,NodePtr & tree)302 void load_tree(FILE* stream, NodePtr& tree) 303 { 304 tree = pool_.allocate<Node>(); 305 load_value(stream, *tree); 306 if (tree->child1!=NULL) { 307 load_tree(stream, tree->child1); 308 } 309 if (tree->child2!=NULL) { 310 load_tree(stream, tree->child2); 311 } 312 } 313 314 computeBoundingBox(BoundingBox & bbox)315 void computeBoundingBox(BoundingBox& bbox) 316 { 317 bbox.resize(dim_); 318 for (size_t i=0; i<dim_; ++i) { 319 bbox[i].low = (DistanceType)dataset_[0][i]; 320 bbox[i].high = (DistanceType)dataset_[0][i]; 321 } 322 for (size_t k=1; k<dataset_.rows; ++k) { 323 for (size_t i=0; i<dim_; ++i) { 324 if (dataset_[k][i]<bbox[i].low) bbox[i].low = (DistanceType)dataset_[k][i]; 325 if (dataset_[k][i]>bbox[i].high) bbox[i].high = (DistanceType)dataset_[k][i]; 326 } 327 } 328 } 329 330 331 /** 332 * Create a tree node that subdivides the list of vecs from vind[first] 333 * to vind[last]. The routine is called recursively on each sublist. 334 * Place a pointer to this new tree node in the location pTree. 335 * 336 * Params: pTree = the new node to create 337 * first = index of the first vector 338 * last = index of the last vector 339 */ divideTree(int left,int right,BoundingBox & bbox)340 NodePtr divideTree(int left, int right, BoundingBox& bbox) 341 { 342 NodePtr node = pool_.allocate<Node>(); // allocate memory 343 344 /* If too few exemplars remain, then make this a leaf node. */ 345 if ( (right-left) <= leaf_max_size_) { 346 node->child1 = node->child2 = NULL; /* Mark as leaf node. */ 347 node->left = left; 348 node->right = right; 349 350 // compute bounding-box of leaf points 351 for (size_t i=0; i<dim_; ++i) { 352 bbox[i].low = (DistanceType)dataset_[vind_[left]][i]; 353 bbox[i].high = (DistanceType)dataset_[vind_[left]][i]; 354 } 355 for (int k=left+1; k<right; ++k) { 356 for (size_t i=0; i<dim_; ++i) { 357 if (bbox[i].low>dataset_[vind_[k]][i]) bbox[i].low=(DistanceType)dataset_[vind_[k]][i]; 358 if (bbox[i].high<dataset_[vind_[k]][i]) bbox[i].high=(DistanceType)dataset_[vind_[k]][i]; 359 } 360 } 361 } 362 else { 363 int idx; 364 int cutfeat; 365 DistanceType cutval; 366 middleSplit_(&vind_[0]+left, right-left, idx, cutfeat, cutval, bbox); 367 368 node->divfeat = cutfeat; 369 370 BoundingBox left_bbox(bbox); 371 left_bbox[cutfeat].high = cutval; 372 node->child1 = divideTree(left, left+idx, left_bbox); 373 374 BoundingBox right_bbox(bbox); 375 right_bbox[cutfeat].low = cutval; 376 node->child2 = divideTree(left+idx, right, right_bbox); 377 378 node->divlow = left_bbox[cutfeat].high; 379 node->divhigh = right_bbox[cutfeat].low; 380 381 for (size_t i=0; i<dim_; ++i) { 382 bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); 383 bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); 384 } 385 } 386 387 return node; 388 } 389 computeMinMax(int * ind,int count,int dim,ElementType & min_elem,ElementType & max_elem)390 void computeMinMax(int* ind, int count, int dim, ElementType& min_elem, ElementType& max_elem) 391 { 392 min_elem = dataset_[ind[0]][dim]; 393 max_elem = dataset_[ind[0]][dim]; 394 for (int i=1; i<count; ++i) { 395 ElementType val = dataset_[ind[i]][dim]; 396 if (val<min_elem) min_elem = val; 397 if (val>max_elem) max_elem = val; 398 } 399 } 400 middleSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)401 void middleSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) 402 { 403 // find the largest span from the approximate bounding box 404 ElementType max_span = bbox[0].high-bbox[0].low; 405 cutfeat = 0; 406 cutval = (bbox[0].high+bbox[0].low)/2; 407 for (size_t i=1; i<dim_; ++i) { 408 ElementType span = bbox[i].high-bbox[i].low; 409 if (span>max_span) { 410 max_span = span; 411 cutfeat = i; 412 cutval = (bbox[i].high+bbox[i].low)/2; 413 } 414 } 415 416 // compute exact span on the found dimension 417 ElementType min_elem, max_elem; 418 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 419 cutval = (min_elem+max_elem)/2; 420 max_span = max_elem - min_elem; 421 422 // check if a dimension of a largest span exists 423 size_t k = cutfeat; 424 for (size_t i=0; i<dim_; ++i) { 425 if (i==k) continue; 426 ElementType span = bbox[i].high-bbox[i].low; 427 if (span>max_span) { 428 computeMinMax(ind, count, i, min_elem, max_elem); 429 span = max_elem - min_elem; 430 if (span>max_span) { 431 max_span = span; 432 cutfeat = i; 433 cutval = (min_elem+max_elem)/2; 434 } 435 } 436 } 437 int lim1, lim2; 438 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 439 440 if (lim1>count/2) index = lim1; 441 else if (lim2<count/2) index = lim2; 442 else index = count/2; 443 } 444 445 middleSplit_(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval,const BoundingBox & bbox)446 void middleSplit_(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) 447 { 448 const float EPS=0.00001f; 449 DistanceType max_span = bbox[0].high-bbox[0].low; 450 for (size_t i=1; i<dim_; ++i) { 451 DistanceType span = bbox[i].high-bbox[i].low; 452 if (span>max_span) { 453 max_span = span; 454 } 455 } 456 DistanceType max_spread = -1; 457 cutfeat = 0; 458 for (size_t i=0; i<dim_; ++i) { 459 DistanceType span = bbox[i].high-bbox[i].low; 460 if (span>(DistanceType)((1-EPS)*max_span)) { 461 ElementType min_elem, max_elem; 462 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 463 DistanceType spread = (DistanceType)(max_elem-min_elem); 464 if (spread>max_spread) { 465 cutfeat = (int)i; 466 max_spread = spread; 467 } 468 } 469 } 470 // split in the middle 471 DistanceType split_val = (bbox[cutfeat].low+bbox[cutfeat].high)/2; 472 ElementType min_elem, max_elem; 473 computeMinMax(ind, count, cutfeat, min_elem, max_elem); 474 475 if (split_val<min_elem) cutval = (DistanceType)min_elem; 476 else if (split_val>max_elem) cutval = (DistanceType)max_elem; 477 else cutval = split_val; 478 479 int lim1, lim2; 480 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 481 482 if (lim1>count/2) index = lim1; 483 else if (lim2<count/2) index = lim2; 484 else index = count/2; 485 } 486 487 488 /** 489 * Subdivide the list of points by a plane perpendicular on axe corresponding 490 * to the 'cutfeat' dimension at 'cutval' position. 491 * 492 * On return: 493 * dataset[ind[0..lim1-1]][cutfeat]<cutval 494 * dataset[ind[lim1..lim2-1]][cutfeat]==cutval 495 * dataset[ind[lim2..count]][cutfeat]>cutval 496 */ planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)497 void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) 498 { 499 /* Move vector indices for left subtree to front of list. */ 500 int left = 0; 501 int right = count-1; 502 for (;; ) { 503 while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; 504 while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; 505 if (left>right) break; 506 std::swap(ind[left], ind[right]); ++left; --right; 507 } 508 /* If either list is empty, it means that all remaining features 509 * are identical. Split in the middle to maintain a balanced tree. 510 */ 511 lim1 = left; 512 right = count-1; 513 for (;; ) { 514 while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; 515 while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; 516 if (left>right) break; 517 std::swap(ind[left], ind[right]); ++left; --right; 518 } 519 lim2 = left; 520 } 521 computeInitialDistances(const ElementType * vec,std::vector<DistanceType> & dists)522 DistanceType computeInitialDistances(const ElementType* vec, std::vector<DistanceType>& dists) 523 { 524 DistanceType distsq = 0.0; 525 526 for (size_t i = 0; i < dim_; ++i) { 527 if (vec[i] < root_bbox_[i].low) { 528 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].low, (int)i); 529 distsq += dists[i]; 530 } 531 if (vec[i] > root_bbox_[i].high) { 532 dists[i] = distance_.accum_dist(vec[i], root_bbox_[i].high, (int)i); 533 distsq += dists[i]; 534 } 535 } 536 537 return distsq; 538 } 539 540 /** 541 * Performs an exact search in the tree starting from a node. 542 */ searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindistsq,std::vector<DistanceType> & dists,const float epsError)543 void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq, 544 std::vector<DistanceType>& dists, const float epsError) 545 { 546 /* If this is a leaf node, then do check and return. */ 547 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 548 DistanceType worst_dist = result_set.worstDist(); 549 for (int i=node->left; i<node->right; ++i) { 550 int index = reorder_ ? i : vind_[i]; 551 DistanceType dist = distance_(vec, data_[index], dim_, worst_dist); 552 if (dist<worst_dist) { 553 result_set.addPoint(dist,vind_[i]); 554 } 555 } 556 return; 557 } 558 559 /* Which child branch should be taken first? */ 560 int idx = node->divfeat; 561 ElementType val = vec[idx]; 562 DistanceType diff1 = val - node->divlow; 563 DistanceType diff2 = val - node->divhigh; 564 565 NodePtr bestChild; 566 NodePtr otherChild; 567 DistanceType cut_dist; 568 if ((diff1+diff2)<0) { 569 bestChild = node->child1; 570 otherChild = node->child2; 571 cut_dist = distance_.accum_dist(val, node->divhigh, idx); 572 } 573 else { 574 bestChild = node->child2; 575 otherChild = node->child1; 576 cut_dist = distance_.accum_dist( val, node->divlow, idx); 577 } 578 579 /* Call recursively to search next level down. */ 580 searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); 581 582 DistanceType dst = dists[idx]; 583 mindistsq = mindistsq + cut_dist - dst; 584 dists[idx] = cut_dist; 585 if (mindistsq*epsError<=result_set.worstDist()) { 586 searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); 587 } 588 dists[idx] = dst; 589 } 590 591 private: 592 593 /** 594 * The dataset used by this index 595 */ 596 const Matrix<ElementType> dataset_; 597 598 IndexParams index_params_; 599 600 int leaf_max_size_; 601 bool reorder_; 602 603 604 /** 605 * Array of indices to vectors in the dataset. 606 */ 607 std::vector<int> vind_; 608 609 Matrix<ElementType> data_; 610 611 size_t size_; 612 size_t dim_; 613 614 /** 615 * Array of k-d trees used to find neighbours. 616 */ 617 NodePtr root_node_; 618 619 BoundingBox root_bbox_; 620 621 /** 622 * Pooled memory allocator. 623 * 624 * Using a pooled memory allocator is more efficient 625 * than allocating memory directly when there is a large 626 * number small of memory allocations. 627 */ 628 PooledAllocator pool_; 629 630 Distance distance_; 631 }; // class KDTree 632 633 } 634 635 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_ 636