1
1

levenshteinMatrice.hpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. enum eLevenshteinOperator: char
  7. {
  8. add = '+',
  9. rem = '-',
  10. mod = '!',
  11. equ = '='
  12. };
  13. class LevenshteinMatrice_base
  14. {
  15. public:
  16. virtual ~LevenshteinMatrice_base() {}
  17. virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  18. virtual size_t result() const =0;
  19. virtual bool areSimilar() const =0;
  20. eLevenshteinOperator get(const JSonElement *) const;
  21. virtual std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  22. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  23. public:
  24. class Builder
  25. {
  26. public:
  27. Builder();
  28. ~Builder();
  29. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  30. };
  31. protected:
  32. std::map<const JSonElement*, eLevenshteinOperator> operations;
  33. };
  34. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  35. {
  36. public:
  37. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  38. size_t result() const;
  39. bool areSimilar() const;
  40. public:
  41. size_t _result;
  42. };
  43. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  44. {
  45. public:
  46. LevenshteinMatriceWithScore(float score, const JSonElement *a, const JSonElement *b);
  47. std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  48. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  49. size_t result() const;
  50. bool areSimilar() const;
  51. private:
  52. bool _result;
  53. const JSonElement *equivalentA, *equivalentB;
  54. };
  55. class LevenshteinMatrice: public LevenshteinMatrice_base
  56. {
  57. public:
  58. template<typename T>
  59. static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
  60. const size_t n, const size_t m)
  61. {
  62. LevenshteinMatrice *result = new LevenshteinMatrice();
  63. size_t i, j;
  64. JSonContainer::const_iterator a = aBegin;
  65. JSonContainer::const_iterator b;
  66. LevenshteinMatrice_base::Builder matriceBuilder;
  67. T **matrice = new T*[n +1]();
  68. LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
  69. matrice[0] = new T[m +1];
  70. for (i =0; i <= m; ++i)
  71. matrice[0][i] = i;
  72. for (i=1; i <= n; ++i)
  73. {
  74. matrice[i] = new T[m +1];
  75. matrice[i][0] = i;
  76. subMatrice[i -1] = new LevenshteinMatrice_base*[m];
  77. for (size_t j=0; j < m; ++j)
  78. subMatrice[i -1][j] = nullptr;
  79. }
  80. for (i =1; i <= n; ++i, ++a)
  81. {
  82. b = bBegin;
  83. for (j =1; j <= m; ++j, ++b)
  84. {
  85. LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
  86. if (_subMatrice != nullptr)
  87. {
  88. const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : _subMatrice->result());
  89. if (chCost <= matrice[i -1][j] +1 &&
  90. chCost <= matrice[i][j -1] +1)
  91. {
  92. matrice[i][j] = chCost;
  93. subMatrice[i -1][j -1] = _subMatrice;
  94. continue;
  95. }
  96. delete _subMatrice;
  97. } // Change is not worth, consider adding/removing
  98. matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
  99. }
  100. }
  101. result->levenDist = matrice[n][m];
  102. result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
  103. result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
  104. cleanMatrice(matrice, subMatrice, n, m);
  105. return result;
  106. };
  107. template<typename T>
  108. static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
  109. {
  110. for (size_t i=0; i <= n; ++i)
  111. {
  112. delete []matrice[i];
  113. if (i != n)
  114. {
  115. for (size_t j=0; j < m; ++j)
  116. if (subMatrice[i][j])
  117. delete subMatrice[i][j];
  118. delete []subMatrice[i];
  119. }
  120. }
  121. delete []matrice;
  122. delete []subMatrice;
  123. };
  124. std::map<const JSonElement*, const JSonElement *> getEquivalences() const;
  125. size_t result() const;
  126. bool areSimilar() const;
  127. const JSonElement *getEquivalence(const JSonElement *) const;
  128. private:
  129. template<typename T>
  130. void shortestPath(T **matrice,
  131. LevenshteinMatrice_base ***subMatrice,
  132. size_t _i, size_t _j,
  133. JSonContainer::const_iterator i, JSonContainer::const_iterator j)
  134. {
  135. while (_i || _j)
  136. {
  137. if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
  138. {
  139. operations[*i] = eLevenshteinOperator::add;
  140. --i;
  141. --_i;
  142. }
  143. else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
  144. {
  145. operations[*j] = eLevenshteinOperator::add;
  146. --j;
  147. --_j;
  148. }
  149. else if (_i && _j)
  150. {
  151. eLevenshteinOperator op =
  152. matrice[_i][_j] == matrice[_i -1][_j -1] ?
  153. eLevenshteinOperator::equ :
  154. eLevenshteinOperator::mod;
  155. operations[*i] = operations[*j] = op;
  156. for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
  157. operations[e.first] = e.second;
  158. for (std::pair<const JSonElement *, const JSonElement *> e : (subMatrice[_i -1][_j -1])->getEquivalences())
  159. equivalences[e.first] = e.second;
  160. equivalences[*i] = *j;
  161. --i;
  162. --j;
  163. --_i;
  164. --_j;
  165. }
  166. else if (_i)
  167. {
  168. operations[*i] = eLevenshteinOperator::add;
  169. --i;
  170. --_i;
  171. }
  172. else if (_j)
  173. {
  174. operations[*j] = eLevenshteinOperator::add;
  175. --j;
  176. --_j;
  177. }
  178. }
  179. }
  180. private:
  181. LevenshteinMatrice();
  182. size_t levenDist;
  183. float levenRelativeDist;
  184. std::map<const JSonElement*, const JSonElement *> equivalences;
  185. };