levenshtein.hpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /**********
  2. This file is part of levenshpp.
  3. levenshpp is free library: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7. levenshpp is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with levenshpp. If not, see <http://www.gnu.org/licenses/>.
  13. **********/
  14. #pragma once
  15. #include <algorithm>
  16. #include <utility>
  17. #include <set>
  18. template <typename T>
  19. class LevenshteinPotencial
  20. {
  21. public:
  22. LevenshteinPotencial(const T &px, const T &py, const T value): coords(std::pair<T, T>(px, py)), minValue(value)
  23. { }
  24. unsigned int getTaxiCab() const
  25. {
  26. return coords.first +coords.second;
  27. }
  28. bool operator<(const LevenshteinPotencial &o) const
  29. {
  30. if (minValue == o.minValue)
  31. return getTaxiCab() > o.getTaxiCab(); // The bigger the best
  32. return minValue < o.minValue;
  33. }
  34. bool operator==(const std::pair<T, T> &o) const
  35. {
  36. return coords.first == o.first && coords.second == o.second;
  37. }
  38. std::pair<T, T> coords;
  39. T minValue;
  40. };
  41. static int levenshtein_get(int **map, int px, int py)
  42. {
  43. if (px == -1 && py == -1)
  44. return 0;
  45. if (px == -1)
  46. return py +1;
  47. if (py == -1)
  48. return px +1;
  49. return map[px][py];
  50. }
  51. template <class T, typename SIZE=unsigned int>
  52. unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
  53. {
  54. int **items = new int*[aSize]();
  55. std::multiset<LevenshteinPotencial<SIZE> > toProcess;
  56. for (SIZE i =0; i < aSize; i++)
  57. {
  58. items[i] = new int[bSize]();
  59. toProcess.insert(LevenshteinPotencial<SIZE>(0, i, i));
  60. for (SIZE j=0; j < bSize; j++)
  61. items[i][j] = -1;
  62. }
  63. for (SIZE i =1; i < bSize; i++)
  64. toProcess.insert(LevenshteinPotencial<SIZE>(i, 0, i));
  65. while (toProcess.size())
  66. {
  67. const auto currentIt = toProcess.cbegin();
  68. const LevenshteinPotencial<SIZE> &current = *(currentIt);
  69. int add = levenshtein_get(items, current.coords.first -1, current.coords.second);
  70. int rem = levenshtein_get(items, current.coords.first, current.coords.second -1);
  71. int mod = levenshtein_get(items, current.coords.first -1, current.coords.second -1);
  72. int min = -1;
  73. // Compute weight
  74. if (add != -1)
  75. min = add +1;
  76. if (rem != -1)
  77. min = min == -1 ? rem +1 : std::min(min, rem +1);
  78. if (mod != -1)
  79. {
  80. if (a[current.coords.first] != b[current.coords.second])
  81. mod++;
  82. min = min == -1 ? mod : std::min(min, mod);
  83. }
  84. items[current.coords.first][current.coords.second] = min;
  85. if (current.coords.first == aSize -1 && current.coords.second == bSize -1)
  86. break;
  87. //update toProcess
  88. add = rem = mod = -1;
  89. for (auto i = toProcess.cbegin(); i != toProcess.cend(); i++)
  90. {
  91. if (*i == std::pair<SIZE, SIZE>(current.coords.first, current.coords.second +1))
  92. {
  93. add = (*i).minValue;
  94. toProcess.erase(i);
  95. }
  96. else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second))
  97. {
  98. rem = (*i).minValue;
  99. toProcess.erase(i);
  100. }
  101. else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second +1))
  102. {
  103. mod = (*i).minValue;
  104. toProcess.erase(i);
  105. }
  106. }
  107. if (current.coords.second +1 < bSize && items[current.coords.first][current.coords.second +1] == -1)
  108. toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first, current.coords.second +1, add == -1 ? min +1 : std::min(min +1, add)));
  109. if (current.coords.first +1 < aSize && items[current.coords.first +1][current.coords.second] == -1)
  110. toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second, rem == -1 ? min +1 : std::min(min +1, rem)));
  111. if (current.coords.first +1 < aSize && current.coords.second +1 < bSize &&
  112. items[current.coords.first +1][current.coords.second +1] == -1)
  113. toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second +1, mod == -1 ? min : std::min(mod, min)));
  114. toProcess.erase(currentIt);
  115. }
  116. const unsigned int levenshtein = items[aSize -1][bSize -1];
  117. for (SIZE i =0; i < aSize; i++)
  118. delete[] items[i];
  119. delete[] items;
  120. return levenshtein;
  121. }