1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 31 #ifndef OPENCV_FLANN_KDTREE_INDEX_H_ 32 #define OPENCV_FLANN_KDTREE_INDEX_H_ 33 34 #include <algorithm> 35 #include <map> 36 #include <cassert> 37 #include <cstring> 38 39 #include "general.h" 40 #include "nn_index.h" 41 #include "dynamic_bitset.h" 42 #include "matrix.h" 43 #include "result_set.h" 44 #include "heap.h" 45 #include "allocator.h" 46 #include "random.h" 47 #include "saving.h" 48 49 50 namespace cvflann 51 { 52 53 struct KDTreeIndexParams : public IndexParams 54 { 55 KDTreeIndexParams(int trees = 4) 56 { 57 (*this)["algorithm"] = FLANN_INDEX_KDTREE; 58 (*this)["trees"] = trees; 59 } 60 }; 61 62 63 /** 64 * Randomized kd-tree index 65 * 66 * Contains the k-d trees and other information for indexing a set of points 67 * for nearest-neighbor matching. 68 */ 69 template <typename Distance> 70 class KDTreeIndex : public NNIndex<Distance> 71 { 72 public: 73 typedef typename Distance::ElementType ElementType; 74 typedef typename Distance::ResultType DistanceType; 75 76 77 /** 78 * KDTree constructor 79 * 80 * Params: 81 * inputData = dataset with the input features 82 * params = parameters passed to the kdtree algorithm 83 */ 84 KDTreeIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeIndexParams(), 85 Distance d = Distance() ) : dataset_(inputData)86 dataset_(inputData), index_params_(params), distance_(d) 87 { 88 size_ = dataset_.rows; 89 veclen_ = dataset_.cols; 90 91 trees_ = get_param(index_params_,"trees",4); 92 tree_roots_ = new NodePtr[trees_]; 93 94 // Create a permutable array of indices to the input vectors. 95 vind_.resize(size_); 96 for (size_t i = 0; i < size_; ++i) { 97 vind_[i] = int(i); 98 } 99 100 mean_ = new DistanceType[veclen_]; 101 var_ = new DistanceType[veclen_]; 102 } 103 104 105 KDTreeIndex(const KDTreeIndex&); 106 KDTreeIndex& operator=(const KDTreeIndex&); 107 108 /** 109 * Standard destructor 110 */ ~KDTreeIndex()111 ~KDTreeIndex() 112 { 113 if (tree_roots_!=NULL) { 114 delete[] tree_roots_; 115 } 116 delete[] mean_; 117 delete[] var_; 118 } 119 120 /** 121 * Builds the index 122 */ buildIndex()123 void buildIndex() CV_OVERRIDE 124 { 125 /* Construct the randomized trees. */ 126 for (int i = 0; i < trees_; i++) { 127 /* Randomize the order of vectors to allow for unbiased sampling. */ 128 #ifndef OPENCV_FLANN_USE_STD_RAND 129 cv::randShuffle(vind_); 130 #else 131 std::random_shuffle(vind_.begin(), vind_.end()); 132 #endif 133 134 tree_roots_[i] = divideTree(&vind_[0], int(size_) ); 135 } 136 } 137 138 getType()139 flann_algorithm_t getType() const CV_OVERRIDE 140 { 141 return FLANN_INDEX_KDTREE; 142 } 143 144 saveIndex(FILE * stream)145 void saveIndex(FILE* stream) CV_OVERRIDE 146 { 147 save_value(stream, trees_); 148 for (int i=0; i<trees_; ++i) { 149 save_tree(stream, tree_roots_[i]); 150 } 151 } 152 153 154 loadIndex(FILE * stream)155 void loadIndex(FILE* stream) CV_OVERRIDE 156 { 157 load_value(stream, trees_); 158 if (tree_roots_!=NULL) { 159 delete[] tree_roots_; 160 } 161 tree_roots_ = new NodePtr[trees_]; 162 for (int i=0; i<trees_; ++i) { 163 load_tree(stream,tree_roots_[i]); 164 } 165 166 index_params_["algorithm"] = getType(); 167 index_params_["trees"] = tree_roots_; 168 } 169 170 /** 171 * Returns size of index. 172 */ size()173 size_t size() const CV_OVERRIDE 174 { 175 return size_; 176 } 177 178 /** 179 * Returns the length of an index feature. 180 */ veclen()181 size_t veclen() const CV_OVERRIDE 182 { 183 return veclen_; 184 } 185 186 /** 187 * Computes the inde memory usage 188 * Returns: memory used by the index 189 */ usedMemory()190 int usedMemory() const CV_OVERRIDE 191 { 192 return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory 193 } 194 195 /** 196 * Find set of nearest neighbors to vec. Their indices are stored inside 197 * the result object. 198 * 199 * Params: 200 * result = the result object in which the indices of the nearest-neighbors are stored 201 * vec = the vector for which to search the nearest neighbors 202 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 203 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)204 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE 205 { 206 int maxChecks = get_param(searchParams,"checks", 32); 207 float epsError = 1+get_param(searchParams,"eps",0.0f); 208 209 if (maxChecks==FLANN_CHECKS_UNLIMITED) { 210 getExactNeighbors(result, vec, epsError); 211 } 212 else { 213 getNeighbors(result, vec, maxChecks, epsError); 214 } 215 } 216 getParameters()217 IndexParams getParameters() const CV_OVERRIDE 218 { 219 return index_params_; 220 } 221 222 private: 223 224 225 /*--------------------- Internal Data Structures --------------------------*/ 226 struct Node 227 { 228 /** 229 * Dimension used for subdivision. 230 */ 231 int divfeat; 232 /** 233 * The values used for subdivision. 234 */ 235 DistanceType divval; 236 /** 237 * The child nodes. 238 */ 239 Node* child1, * child2; 240 }; 241 typedef Node* NodePtr; 242 typedef BranchStruct<NodePtr, DistanceType> BranchSt; 243 typedef BranchSt* Branch; 244 245 246 save_tree(FILE * stream,NodePtr tree)247 void save_tree(FILE* stream, NodePtr tree) 248 { 249 save_value(stream, *tree); 250 if (tree->child1!=NULL) { 251 save_tree(stream, tree->child1); 252 } 253 if (tree->child2!=NULL) { 254 save_tree(stream, tree->child2); 255 } 256 } 257 258 load_tree(FILE * stream,NodePtr & tree)259 void load_tree(FILE* stream, NodePtr& tree) 260 { 261 tree = pool_.allocate<Node>(); 262 load_value(stream, *tree); 263 if (tree->child1!=NULL) { 264 load_tree(stream, tree->child1); 265 } 266 if (tree->child2!=NULL) { 267 load_tree(stream, tree->child2); 268 } 269 } 270 271 272 /** 273 * Create a tree node that subdivides the list of vecs from vind[first] 274 * to vind[last]. The routine is called recursively on each sublist. 275 * Place a pointer to this new tree node in the location pTree. 276 * 277 * Params: pTree = the new node to create 278 * first = index of the first vector 279 * last = index of the last vector 280 */ divideTree(int * ind,int count)281 NodePtr divideTree(int* ind, int count) 282 { 283 NodePtr node = pool_.allocate<Node>(); // allocate memory 284 285 /* If too few exemplars remain, then make this a leaf node. */ 286 if ( count == 1) { 287 node->child1 = node->child2 = NULL; /* Mark as leaf node. */ 288 node->divfeat = *ind; /* Store index of this vec. */ 289 } 290 else { 291 int idx; 292 int cutfeat; 293 DistanceType cutval; 294 meanSplit(ind, count, idx, cutfeat, cutval); 295 296 node->divfeat = cutfeat; 297 node->divval = cutval; 298 node->child1 = divideTree(ind, idx); 299 node->child2 = divideTree(ind+idx, count-idx); 300 } 301 302 return node; 303 } 304 305 306 /** 307 * Choose which feature to use in order to subdivide this set of vectors. 308 * Make a random choice among those with the highest variance, and use 309 * its variance as the threshold value. 310 */ meanSplit(int * ind,int count,int & index,int & cutfeat,DistanceType & cutval)311 void meanSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval) 312 { 313 memset(mean_,0,veclen_*sizeof(DistanceType)); 314 memset(var_,0,veclen_*sizeof(DistanceType)); 315 316 /* Compute mean values. Only the first SAMPLE_MEAN values need to be 317 sampled to get a good estimate. 318 */ 319 int cnt = std::min((int)SAMPLE_MEAN+1, count); 320 for (int j = 0; j < cnt; ++j) { 321 ElementType* v = dataset_[ind[j]]; 322 for (size_t k=0; k<veclen_; ++k) { 323 mean_[k] += v[k]; 324 } 325 } 326 for (size_t k=0; k<veclen_; ++k) { 327 mean_[k] /= cnt; 328 } 329 330 /* Compute variances (no need to divide by count). */ 331 for (int j = 0; j < cnt; ++j) { 332 ElementType* v = dataset_[ind[j]]; 333 for (size_t k=0; k<veclen_; ++k) { 334 DistanceType dist = v[k] - mean_[k]; 335 var_[k] += dist * dist; 336 } 337 } 338 /* Select one of the highest variance indices at random. */ 339 cutfeat = selectDivision(var_); 340 cutval = mean_[cutfeat]; 341 342 int lim1, lim2; 343 planeSplit(ind, count, cutfeat, cutval, lim1, lim2); 344 345 if (lim1>count/2) index = lim1; 346 else if (lim2<count/2) index = lim2; 347 else index = count/2; 348 349 /* If either list is empty, it means that all remaining features 350 * are identical. Split in the middle to maintain a balanced tree. 351 */ 352 if ((lim1==count)||(lim2==0)) index = count/2; 353 } 354 355 356 /** 357 * Select the top RAND_DIM largest values from v and return the index of 358 * one of these selected at random. 359 */ selectDivision(DistanceType * v)360 int selectDivision(DistanceType* v) 361 { 362 int num = 0; 363 size_t topind[RAND_DIM]; 364 365 /* Create a list of the indices of the top RAND_DIM values. */ 366 for (size_t i = 0; i < veclen_; ++i) { 367 if ((num < RAND_DIM)||(v[i] > v[topind[num-1]])) { 368 /* Put this element at end of topind. */ 369 if (num < RAND_DIM) { 370 topind[num++] = i; /* Add to list. */ 371 } 372 else { 373 topind[num-1] = i; /* Replace last element. */ 374 } 375 /* Bubble end value down to right location by repeated swapping. */ 376 int j = num - 1; 377 while (j > 0 && v[topind[j]] > v[topind[j-1]]) { 378 std::swap(topind[j], topind[j-1]); 379 --j; 380 } 381 } 382 } 383 /* Select a random integer in range [0,num-1], and return that index. */ 384 int rnd = rand_int(num); 385 return (int)topind[rnd]; 386 } 387 388 389 /** 390 * Subdivide the list of points by a plane perpendicular on axe corresponding 391 * to the 'cutfeat' dimension at 'cutval' position. 392 * 393 * On return: 394 * dataset[ind[0..lim1-1]][cutfeat]<cutval 395 * dataset[ind[lim1..lim2-1]][cutfeat]==cutval 396 * dataset[ind[lim2..count]][cutfeat]>cutval 397 */ planeSplit(int * ind,int count,int cutfeat,DistanceType cutval,int & lim1,int & lim2)398 void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) 399 { 400 /* Move vector indices for left subtree to front of list. */ 401 int left = 0; 402 int right = count-1; 403 for (;; ) { 404 while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; 405 while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; 406 if (left>right) break; 407 std::swap(ind[left], ind[right]); ++left; --right; 408 } 409 lim1 = left; 410 right = count-1; 411 for (;; ) { 412 while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; 413 while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; 414 if (left>right) break; 415 std::swap(ind[left], ind[right]); ++left; --right; 416 } 417 lim2 = left; 418 } 419 420 /** 421 * Performs an exact nearest neighbor search. The exact search performs a full 422 * traversal of the tree. 423 */ getExactNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,float epsError)424 void getExactNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, float epsError) 425 { 426 // checkID -= 1; /* Set a different unique ID for each search. */ 427 428 if (trees_ > 1) { 429 fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search"); 430 } 431 if (trees_>0) { 432 searchLevelExact(result, vec, tree_roots_[0], 0.0, epsError); 433 } 434 assert(result.full()); 435 } 436 437 /** 438 * Performs the approximate nearest-neighbor search. The search is approximate 439 * because the tree traversal is abandoned after a given number of descends in 440 * the tree. 441 */ getNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,int maxCheck,float epsError)442 void getNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, int maxCheck, float epsError) 443 { 444 int i; 445 BranchSt branch; 446 447 int checkCount = 0; 448 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); 449 DynamicBitset checked(size_); 450 451 /* Search once through each tree down to root. */ 452 for (i = 0; i < trees_; ++i) { 453 searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, epsError, heap, checked); 454 } 455 456 /* Keep searching other branches from heap until finished. */ 457 while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) { 458 searchLevel(result, vec, branch.node, branch.mindist, checkCount, maxCheck, epsError, heap, checked); 459 } 460 461 delete heap; 462 463 assert(result.full()); 464 } 465 466 467 /** 468 * Search starting from a given node of the tree. Based on any mismatches at 469 * higher levels, all exemplars below this level must have a distance of 470 * at least "mindistsq". 471 */ searchLevel(ResultSet<DistanceType> & result_set,const ElementType * vec,NodePtr node,DistanceType mindist,int & checkCount,int maxCheck,float epsError,Heap<BranchSt> * heap,DynamicBitset & checked)472 void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck, 473 float epsError, Heap<BranchSt>* heap, DynamicBitset& checked) 474 { 475 if (result_set.worstDist()<mindist) { 476 // printf("Ignoring branch, too far\n"); 477 return; 478 } 479 480 /* If this is a leaf node, then do check and return. */ 481 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 482 /* Do not check same node more than once when searching multiple trees. 483 Once a vector is checked, we set its location in vind to the 484 current checkID. 485 */ 486 int index = node->divfeat; 487 if ( checked.test(index) || ((checkCount>=maxCheck)&& result_set.full()) ) return; 488 checked.set(index); 489 checkCount++; 490 491 DistanceType dist = distance_(dataset_[index], vec, veclen_); 492 result_set.addPoint(dist,index); 493 494 return; 495 } 496 497 /* Which child branch should be taken first? */ 498 ElementType val = vec[node->divfeat]; 499 DistanceType diff = val - node->divval; 500 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 501 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 502 503 /* Create a branch record for the branch not taken. Add distance 504 of this feature boundary (we don't attempt to correct for any 505 use of this feature in a parent node, which is unlikely to 506 happen and would have only a small effect). Don't bother 507 adding more branches to heap after halfway point, as cost of 508 adding exceeds their value. 509 */ 510 511 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 512 // if (2 * checkCount < maxCheck || !result.full()) { 513 if ((new_distsq*epsError < result_set.worstDist())|| !result_set.full()) { 514 heap->insert( BranchSt(otherChild, new_distsq) ); 515 } 516 517 /* Call recursively to search next level down. */ 518 searchLevel(result_set, vec, bestChild, mindist, checkCount, maxCheck, epsError, heap, checked); 519 } 520 521 /** 522 * Performs an exact search in the tree starting from a node. 523 */ searchLevelExact(ResultSet<DistanceType> & result_set,const ElementType * vec,const NodePtr node,DistanceType mindist,const float epsError)524 void searchLevelExact(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, const float epsError) 525 { 526 /* If this is a leaf node, then do check and return. */ 527 if ((node->child1 == NULL)&&(node->child2 == NULL)) { 528 int index = node->divfeat; 529 DistanceType dist = distance_(dataset_[index], vec, veclen_); 530 result_set.addPoint(dist,index); 531 return; 532 } 533 534 /* Which child branch should be taken first? */ 535 ElementType val = vec[node->divfeat]; 536 DistanceType diff = val - node->divval; 537 NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; 538 NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; 539 540 /* Create a branch record for the branch not taken. Add distance 541 of this feature boundary (we don't attempt to correct for any 542 use of this feature in a parent node, which is unlikely to 543 happen and would have only a small effect). Don't bother 544 adding more branches to heap after halfway point, as cost of 545 adding exceeds their value. 546 */ 547 548 DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); 549 550 /* Call recursively to search next level down. */ 551 searchLevelExact(result_set, vec, bestChild, mindist, epsError); 552 553 if (new_distsq*epsError<=result_set.worstDist()) { 554 searchLevelExact(result_set, vec, otherChild, new_distsq, epsError); 555 } 556 } 557 558 559 private: 560 561 enum 562 { 563 /** 564 * To improve efficiency, only SAMPLE_MEAN random values are used to 565 * compute the mean and variance at each level when building a tree. 566 * A value of 100 seems to perform as well as using all values. 567 */ 568 SAMPLE_MEAN = 100, 569 /** 570 * Top random dimensions to consider 571 * 572 * When creating random trees, the dimension on which to subdivide is 573 * selected at random from among the top RAND_DIM dimensions with the 574 * highest variance. A value of 5 works well. 575 */ 576 RAND_DIM=5 577 }; 578 579 580 /** 581 * Number of randomized trees that are used 582 */ 583 int trees_; 584 585 /** 586 * Array of indices to vectors in the dataset. 587 */ 588 std::vector<int> vind_; 589 590 /** 591 * The dataset used by this index 592 */ 593 const Matrix<ElementType> dataset_; 594 595 IndexParams index_params_; 596 597 size_t size_; 598 size_t veclen_; 599 600 601 DistanceType* mean_; 602 DistanceType* var_; 603 604 605 /** 606 * Array of k-d trees used to find neighbours. 607 */ 608 NodePtr* tree_roots_; 609 610 /** 611 * Pooled memory allocator. 612 * 613 * Using a pooled memory allocator is more efficient 614 * than allocating memory directly when there is a large 615 * number small of memory allocations. 616 */ 617 PooledAllocator pool_; 618 619 Distance distance_; 620 621 622 }; // class KDTreeForest 623 624 } 625 626 #endif //OPENCV_FLANN_KDTREE_INDEX_H_ 627