levenshteinMatrice.hpp 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. #include "outputFlag.hh"
  7. class LevenshteinMatrice_base
  8. {
  9. public:
  10. virtual ~LevenshteinMatrice_base() {}
  11. virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  12. virtual size_t result() const =0;
  13. virtual bool areSimilar() const =0;
  14. eLevenshteinOperator get(const JSonElement *) const;
  15. virtual std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  16. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  17. public:
  18. class Builder
  19. {
  20. public:
  21. Builder();
  22. ~Builder();
  23. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  24. };
  25. protected:
  26. std::map<const JSonElement*, eLevenshteinOperator> operations;
  27. };
  28. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  29. {
  30. public:
  31. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  32. size_t result() const;
  33. bool areSimilar() const;
  34. public:
  35. size_t _result;
  36. };
  37. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  38. {
  39. public:
  40. LevenshteinMatriceWithScore(float score, const JSonElement *a, const JSonElement *b);
  41. LevenshteinMatriceWithScore(float score, const JSonElement *a, const JSonElement *b, bool sameType);
  42. std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  43. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  44. size_t result() const;
  45. bool areSimilar() const;
  46. private:
  47. bool _result;
  48. const JSonElement *equivalentA, *equivalentB;
  49. };
  50. class LevenshteinMatrice: public LevenshteinMatrice_base
  51. {
  52. public:
  53. template<typename T>
  54. static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
  55. const size_t n, const size_t m)
  56. {
  57. LevenshteinMatrice *result = new LevenshteinMatrice();
  58. size_t i, j;
  59. JSonContainer::const_iterator a = aBegin;
  60. JSonContainer::const_iterator b;
  61. LevenshteinMatrice_base::Builder matriceBuilder;
  62. T **matrice = new T*[n +1]();
  63. LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
  64. matrice[0] = new T[m +1];
  65. for (i =0; i <= m; ++i)
  66. matrice[0][i] = i;
  67. for (i=1; i <= n; ++i)
  68. {
  69. matrice[i] = new T[m +1];
  70. matrice[i][0] = i;
  71. subMatrice[i -1] = new LevenshteinMatrice_base*[m];
  72. for (size_t j=0; j < m; ++j)
  73. subMatrice[i -1][j] = nullptr;
  74. }
  75. for (i =1; i <= n; ++i, ++a)
  76. {
  77. b = bBegin;
  78. for (j =1; j <= m; ++j, ++b)
  79. {
  80. LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
  81. if (_subMatrice != nullptr)
  82. {
  83. const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : _subMatrice->result());
  84. if (chCost <= matrice[i -1][j] +1 &&
  85. chCost <= matrice[i][j -1] +1)
  86. {
  87. matrice[i][j] = chCost;
  88. subMatrice[i -1][j -1] = _subMatrice;
  89. continue;
  90. }
  91. delete _subMatrice;
  92. } // Change is not worth, consider adding/removing
  93. matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
  94. }
  95. }
  96. result->levenDist = matrice[n][m];
  97. result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
  98. if (result->levenRelativeDist > LEVENSHTEIN_SENSIBILITY)
  99. result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
  100. cleanMatrice(matrice, subMatrice, n, m);
  101. return result;
  102. };
  103. template<typename T>
  104. static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
  105. {
  106. for (size_t i=0; i <= n; ++i)
  107. {
  108. delete []matrice[i];
  109. if (i != n)
  110. {
  111. for (size_t j=0; j < m; ++j)
  112. if (subMatrice[i][j])
  113. delete subMatrice[i][j];
  114. delete []subMatrice[i];
  115. }
  116. }
  117. delete []matrice;
  118. delete []subMatrice;
  119. };
  120. void addRoot(const JSonContainer *a, const JSonContainer *b)
  121. {
  122. if (levenRelativeDist < LEVENSHTEIN_SENSIBILITY)
  123. {
  124. operations[a] = eLevenshteinOperator::add;
  125. operations[b] = eLevenshteinOperator::add;
  126. }
  127. else
  128. {
  129. operations[a] = operations[b] = levenRelativeDist < 1.f ? eLevenshteinOperator::mod : eLevenshteinOperator::equ;
  130. equivalences[a] = b;
  131. equivalences[b] = a;
  132. }
  133. }
  134. std::map<const JSonElement*, const JSonElement *> getEquivalences() const;
  135. size_t result() const;
  136. bool areSimilar() const;
  137. const JSonElement *getEquivalence(const JSonElement *) const;
  138. private:
  139. template<typename T>
  140. void shortestPath(T **matrice,
  141. LevenshteinMatrice_base ***subMatrice,
  142. size_t _i, size_t _j,
  143. JSonContainer::const_iterator i, JSonContainer::const_iterator j)
  144. {
  145. while (_i || _j)
  146. {
  147. if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
  148. {
  149. operations[*i] = eLevenshteinOperator::add;
  150. --i;
  151. --_i;
  152. }
  153. else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
  154. {
  155. operations[*j] = eLevenshteinOperator::add;
  156. --j;
  157. --_j;
  158. }
  159. else if (_i && _j)
  160. {
  161. eLevenshteinOperator op =
  162. matrice[_i][_j] == matrice[_i -1][_j -1] ?
  163. eLevenshteinOperator::equ :
  164. eLevenshteinOperator::mod;
  165. operations[*i] = operations[*j] = op;
  166. for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
  167. operations[e.first] = e.second;
  168. for (std::pair<const JSonElement *, const JSonElement *> e : (subMatrice[_i -1][_j -1])->getEquivalences())
  169. equivalences[e.first] = e.second;
  170. equivalences[*i] = *j;
  171. --i;
  172. --j;
  173. --_i;
  174. --_j;
  175. }
  176. else if (_i)
  177. {
  178. operations[*i] = eLevenshteinOperator::add;
  179. --i;
  180. --_i;
  181. }
  182. else if (_j)
  183. {
  184. operations[*j] = eLevenshteinOperator::add;
  185. --j;
  186. --_j;
  187. }
  188. }
  189. }
  190. private:
  191. LevenshteinMatrice();
  192. size_t levenDist;
  193. float levenRelativeDist;
  194. std::map<const JSonElement*, const JSonElement *> equivalences;
  195. };