1
1

levenshteinMatrice.hpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. enum eLevenshteinOperator: char
  7. {
  8. add = '+',
  9. rem = '-',
  10. mod = '!',
  11. equ = '='
  12. };
  13. class LevenshteinMatrice_base
  14. {
  15. public:
  16. virtual ~LevenshteinMatrice_base() {}
  17. virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  18. virtual size_t result() const =0;
  19. virtual bool areSimilar() const =0;
  20. eLevenshteinOperator get(const JSonElement *) const;
  21. public:
  22. class Builder
  23. {
  24. public:
  25. Builder();
  26. ~Builder();
  27. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  28. };
  29. protected:
  30. std::map<const JSonElement*, eLevenshteinOperator> operations;
  31. };
  32. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  33. {
  34. public:
  35. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  36. size_t result() const;
  37. bool areSimilar() const;
  38. public:
  39. size_t _result;
  40. };
  41. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  42. {
  43. public:
  44. LevenshteinMatriceWithScore(float score);
  45. size_t result() const;
  46. bool areSimilar() const;
  47. private:
  48. bool _result;
  49. };
  50. class LevenshteinMatrice: public LevenshteinMatrice_base
  51. {
  52. public:
  53. template<typename T>
  54. static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
  55. const size_t n, const size_t m)
  56. {
  57. LevenshteinMatrice *result = new LevenshteinMatrice();
  58. size_t i, j;
  59. JSonContainer::const_iterator a = aBegin;
  60. JSonContainer::const_iterator b;
  61. LevenshteinMatrice_base::Builder matriceBuilder;
  62. T **matrice = new T*[n +1]();
  63. LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
  64. matrice[0] = new T[m +1];
  65. for (i =0; i <= m; ++i)
  66. matrice[0][i] = i;
  67. for (i=1; i <= n; ++i)
  68. {
  69. matrice[i] = new T[m +1];
  70. matrice[i][0] = i;
  71. subMatrice[i -1] = new LevenshteinMatrice_base*[m];
  72. for (size_t j=0; j < m; ++j)
  73. subMatrice[i -1][j] = nullptr;
  74. }
  75. for (i =1; i <= n; ++i, ++a)
  76. {
  77. b = bBegin;
  78. for (j =1; j <= m; ++j, ++b)
  79. {
  80. LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
  81. if (_subMatrice != nullptr)
  82. {
  83. const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : 1);
  84. if (chCost <= matrice[i -1][j] +1 &&
  85. chCost <= matrice[i][j -1] +1)
  86. {
  87. matrice[i][j] = chCost;
  88. subMatrice[i -1][j -1] = _subMatrice;
  89. continue;
  90. }
  91. delete _subMatrice;
  92. } // Change is not worth or subMatrice is null (eg. a and b has different types)
  93. matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
  94. }
  95. }
  96. result->levenDist = matrice[n][m];
  97. result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
  98. result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
  99. cleanMatrice(matrice, subMatrice, n, m);
  100. return result;
  101. };
  102. template<typename T>
  103. static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
  104. {
  105. for (size_t i=0; i <= n; ++i)
  106. {
  107. delete []matrice[i];
  108. if (i != n)
  109. {
  110. for (size_t j=0; j < m; ++j)
  111. if (subMatrice[i][j])
  112. delete subMatrice[i][j];
  113. delete []subMatrice[i];
  114. }
  115. }
  116. delete []matrice;
  117. delete []subMatrice;
  118. };
  119. size_t result() const;
  120. bool areSimilar() const;
  121. private:
  122. template<typename T>
  123. void shortestPath(T **matrice,
  124. LevenshteinMatrice_base ***subMatrice,
  125. size_t _i, size_t _j,
  126. JSonContainer::const_iterator i, JSonContainer::const_iterator j)
  127. {
  128. while (_i || _j)
  129. {
  130. if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
  131. {
  132. operations[*i] = eLevenshteinOperator::add;
  133. --i;
  134. --_i;
  135. }
  136. else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
  137. {
  138. operations[*j] = eLevenshteinOperator::add;
  139. --j;
  140. --_j;
  141. }
  142. else if (_i && _j)
  143. {
  144. eLevenshteinOperator op =
  145. matrice[_i][_j] == matrice[_i -1][_j -1] ?
  146. eLevenshteinOperator::equ :
  147. eLevenshteinOperator::mod;
  148. operations[*i] = operations[*j] = op;
  149. for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
  150. operations[e.first] = e.second;
  151. --i;
  152. --j;
  153. --_i;
  154. --_j;
  155. }
  156. else if (_i)
  157. {
  158. operations[*i] = eLevenshteinOperator::add;
  159. --i;
  160. --_i;
  161. }
  162. else if (_j)
  163. {
  164. operations[*j] = eLevenshteinOperator::add;
  165. --j;
  166. --_j;
  167. }
  168. }
  169. }
  170. private:
  171. LevenshteinMatrice();
  172. size_t levenDist;
  173. float levenRelativeDist;
  174. };