levenshteinMatrice.hpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. enum eLevenshteinOperator: char
  7. {
  8. add = '+',
  9. rem = '-',
  10. mod = '!',
  11. equ = '='
  12. };
  13. class LevenshteinMatrice_base
  14. {
  15. public:
  16. virtual ~LevenshteinMatrice_base() {}
  17. virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  18. virtual size_t result() const =0;
  19. virtual bool areSimilar() const =0;
  20. eLevenshteinOperator get(const JSonElement *) const;
  21. virtual std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  22. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  23. public:
  24. class Builder
  25. {
  26. public:
  27. Builder();
  28. ~Builder();
  29. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  30. };
  31. protected:
  32. std::map<const JSonElement*, eLevenshteinOperator> operations;
  33. };
  34. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  35. {
  36. public:
  37. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  38. size_t result() const;
  39. bool areSimilar() const;
  40. public:
  41. size_t _result;
  42. };
  43. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  44. {
  45. public:
  46. LevenshteinMatriceWithScore(float score, const JSonElement *a, const JSonElement *b);
  47. std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  48. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  49. size_t result() const;
  50. bool areSimilar() const;
  51. private:
  52. bool _result;
  53. const JSonElement *equivalentA, *equivalentB;
  54. };
  55. class LevenshteinMatrice: public LevenshteinMatrice_base
  56. {
  57. public:
  58. template<typename T>
  59. static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
  60. const size_t n, const size_t m)
  61. {
  62. LevenshteinMatrice *result = new LevenshteinMatrice();
  63. size_t i, j;
  64. JSonContainer::const_iterator a = aBegin;
  65. JSonContainer::const_iterator b;
  66. LevenshteinMatrice_base::Builder matriceBuilder;
  67. T **matrice = new T*[n +1]();
  68. LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
  69. matrice[0] = new T[m +1];
  70. for (i =0; i <= m; ++i)
  71. matrice[0][i] = i;
  72. for (i=1; i <= n; ++i)
  73. {
  74. matrice[i] = new T[m +1];
  75. matrice[i][0] = i;
  76. subMatrice[i -1] = new LevenshteinMatrice_base*[m];
  77. for (size_t j=0; j < m; ++j)
  78. subMatrice[i -1][j] = nullptr;
  79. }
  80. for (i =1; i <= n; ++i, ++a)
  81. {
  82. b = bBegin;
  83. for (j =1; j <= m; ++j, ++b)
  84. {
  85. LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
  86. if (_subMatrice != nullptr)
  87. {
  88. const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : _subMatrice->result());
  89. if (chCost <= matrice[i -1][j] +1 &&
  90. chCost <= matrice[i][j -1] +1)
  91. {
  92. matrice[i][j] = chCost;
  93. subMatrice[i -1][j -1] = _subMatrice;
  94. continue;
  95. }
  96. delete _subMatrice;
  97. } // Change is not worth, consider adding/removing
  98. matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
  99. }
  100. }
  101. result->levenDist = matrice[n][m];
  102. result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
  103. result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
  104. cleanMatrice(matrice, subMatrice, n, m);
  105. return result;
  106. };
  107. template<typename T>
  108. static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
  109. {
  110. for (size_t i=0; i <= n; ++i)
  111. {
  112. delete []matrice[i];
  113. if (i != n)
  114. {
  115. for (size_t j=0; j < m; ++j)
  116. if (subMatrice[i][j])
  117. delete subMatrice[i][j];
  118. delete []subMatrice[i];
  119. }
  120. }
  121. delete []matrice;
  122. delete []subMatrice;
  123. };
  124. void addRoot(const JSonContainer *a, const JSonContainer *b)
  125. {
  126. if (levenRelativeDist < LEVENSHTEIN_SENSIBILITY)
  127. {
  128. operations[a] = eLevenshteinOperator::add;
  129. operations[b] = eLevenshteinOperator::add;
  130. }
  131. else
  132. {
  133. operations[a] = operations[b] = levenRelativeDist < 1.f ? eLevenshteinOperator::mod : eLevenshteinOperator::equ;
  134. equivalences[a] = b;
  135. equivalences[b] = a;
  136. }
  137. }
  138. std::map<const JSonElement*, const JSonElement *> getEquivalences() const;
  139. size_t result() const;
  140. bool areSimilar() const;
  141. const JSonElement *getEquivalence(const JSonElement *) const;
  142. private:
  143. template<typename T>
  144. void shortestPath(T **matrice,
  145. LevenshteinMatrice_base ***subMatrice,
  146. size_t _i, size_t _j,
  147. JSonContainer::const_iterator i, JSonContainer::const_iterator j)
  148. {
  149. while (_i || _j)
  150. {
  151. if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
  152. {
  153. operations[*i] = eLevenshteinOperator::add;
  154. --i;
  155. --_i;
  156. }
  157. else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
  158. {
  159. operations[*j] = eLevenshteinOperator::add;
  160. --j;
  161. --_j;
  162. }
  163. else if (_i && _j)
  164. {
  165. eLevenshteinOperator op =
  166. matrice[_i][_j] == matrice[_i -1][_j -1] ?
  167. eLevenshteinOperator::equ :
  168. eLevenshteinOperator::mod;
  169. operations[*i] = operations[*j] = op;
  170. for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
  171. operations[e.first] = e.second;
  172. for (std::pair<const JSonElement *, const JSonElement *> e : (subMatrice[_i -1][_j -1])->getEquivalences())
  173. equivalences[e.first] = e.second;
  174. equivalences[*i] = *j;
  175. --i;
  176. --j;
  177. --_i;
  178. --_j;
  179. }
  180. else if (_i)
  181. {
  182. operations[*i] = eLevenshteinOperator::add;
  183. --i;
  184. --_i;
  185. }
  186. else if (_j)
  187. {
  188. operations[*j] = eLevenshteinOperator::add;
  189. --j;
  190. --_j;
  191. }
  192. }
  193. }
  194. private:
  195. LevenshteinMatrice();
  196. size_t levenDist;
  197. float levenRelativeDist;
  198. std::map<const JSonElement*, const JSonElement *> equivalences;
  199. };