1 #pragma once
2 
3 #include <cstddef>
4 #include <functional>
5 #include <iterator>
6 #include <vector>
7 
8 namespace mbgl {
9 
10 /*
11     Computes the longest common subsequence (LCS) of sequences A and B, represented
12     by pairs of random access iterators. The result is output to the provided output
13     iterator. Equality of elements is determined by the provided comparator, defaulting
14     to ==.
15 
16     The algorithm used is the O(ND) time and space algorithm from:
17 
18         Myers, Eugene W. An O(ND) Difference Algorithm and Its Variations. Algorithmica
19         (1986) 1: 251. http://xmailserver.org/diff2.pdf
20 
21     For understanding this algorithm, http://simplygenius.net/Article/DiffTutorial1 is
22     also helpful.
23 
24     TODO: implement the O(N) space refinement presented in the paper.
25 */
26 template <class InIt, class OutIt, class Equal>
longest_common_subsequence(InIt a,InIt endA,InIt b,InIt endB,OutIt outIt,Equal eq)27 OutIt longest_common_subsequence(InIt a, InIt endA,
28                                  InIt b, InIt endB,
29                                  OutIt outIt,
30                                  Equal eq) {
31     const std::ptrdiff_t N = endA - a;
32     const std::ptrdiff_t M = endB - b;
33     const std::ptrdiff_t D = N + M;
34 
35     if (D == 0) {
36         return outIt;
37     }
38 
39     std::vector<std::vector<std::ptrdiff_t>> vs;
40 
41     // Self-executing lambda to allow `return` to break from inner loop, and avoid shadowing `v`.
42     [&] () {
43         std::vector<std::ptrdiff_t> v;
44         v.resize(2 * D + 1);
45         v[1] = 0;
46 
47         // Core of the algorithm: greedily find farthest-reaching D-paths for increasing
48         // values of D. Store the farthest-reaching endpoints found in each iteration for
49         // later reconstructing the LCS.
50         for (std::ptrdiff_t d = 0; d <= D; ++d) {
51             for (std::ptrdiff_t k = -d; k <= d; k += 2) {
52                 std::ptrdiff_t x = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D)))
53                     ? v.at(k + 1 + D)       // moving down
54                     : v.at(k - 1 + D) + 1;  // moving right
55 
56                 std::ptrdiff_t y = x - k;
57 
58                 while (x < N && y < M && eq(a[x], b[y])) {
59                     x++;
60                     y++;
61                 }
62 
63                 v[k + D] = x;
64 
65                 if (x >= N && y >= M) {
66                     vs.push_back(v);
67                     return;
68                 }
69             }
70 
71             vs.push_back(v);
72         }
73     }();
74 
75     std::ptrdiff_t x = N;
76     std::ptrdiff_t y = M;
77 
78     using E = typename std::iterator_traits<InIt>::value_type;
79     std::vector<E> lcsReverse;
80 
81     // Reconstruct the LCS using the farthest-reaching endpoints stored above.
82     for (std::ptrdiff_t d = vs.size() - 1; x > 0 || y > 0; --d) {
83         const std::vector<std::ptrdiff_t>& v = vs.at(d);
84         const std::ptrdiff_t k = x - y;
85         const bool down = (k == -d || (k != d && v.at(k - 1 + D) < v.at(k + 1 + D)));
86         const std::ptrdiff_t kPrev = down ? k + 1 : k - 1;
87 
88         x = v.at(kPrev + D);
89         y = x - kPrev;
90 
91         for (std::ptrdiff_t c = v[k + D]; c != (down ? x : x + 1); --c) {
92             lcsReverse.push_back(a[c - 1]);
93         }
94     }
95 
96     return std::copy(lcsReverse.rbegin(), lcsReverse.rend(), outIt);
97 }
98 
99 template < typename InIt, typename OutIt >
longest_common_subsequence(InIt a,InIt endA,InIt b,InIt endB,OutIt outIt)100 OutIt longest_common_subsequence(InIt a, InIt endA,
101                                  InIt b, InIt endB,
102                                  OutIt outIt) {
103     return longest_common_subsequence(a, endA, b, endB, outIt, std::equal_to<>());
104 }
105 
106 } // namespace mbgl
107