1
1

levenshteinMatrice.hpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. #include "outputFlag.hh"
  7. class LevenshteinMatrice_base
  8. {
  9. public:
  10. virtual ~LevenshteinMatrice_base() {}
  11. virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  12. virtual size_t result() const =0;
  13. virtual bool areSimilar() const =0;
  14. eLevenshteinOperator get(const JSonElement *) const;
  15. virtual std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  16. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  17. public:
  18. class Builder
  19. {
  20. public:
  21. Builder();
  22. ~Builder();
  23. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  24. };
  25. protected:
  26. std::map<const JSonElement*, eLevenshteinOperator> operations;
  27. };
  28. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  29. {
  30. public:
  31. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  32. size_t result() const;
  33. bool areSimilar() const;
  34. public:
  35. size_t _result;
  36. };
  37. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  38. {
  39. public:
  40. LevenshteinMatriceWithScore(float score, const JSonElement *a, const JSonElement *b);
  41. std::map<const JSonElement *, const JSonElement *> getEquivalences() const;
  42. virtual const JSonElement * getEquivalence(const JSonElement *) const;
  43. size_t result() const;
  44. bool areSimilar() const;
  45. private:
  46. bool _result;
  47. const JSonElement *equivalentA, *equivalentB;
  48. };
  49. class LevenshteinMatrice: public LevenshteinMatrice_base
  50. {
  51. public:
  52. template<typename T>
  53. static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
  54. const size_t n, const size_t m)
  55. {
  56. LevenshteinMatrice *result = new LevenshteinMatrice();
  57. size_t i, j;
  58. JSonContainer::const_iterator a = aBegin;
  59. JSonContainer::const_iterator b;
  60. LevenshteinMatrice_base::Builder matriceBuilder;
  61. T **matrice = new T*[n +1]();
  62. LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
  63. matrice[0] = new T[m +1];
  64. for (i =0; i <= m; ++i)
  65. matrice[0][i] = i;
  66. for (i=1; i <= n; ++i)
  67. {
  68. matrice[i] = new T[m +1];
  69. matrice[i][0] = i;
  70. subMatrice[i -1] = new LevenshteinMatrice_base*[m];
  71. for (size_t j=0; j < m; ++j)
  72. subMatrice[i -1][j] = nullptr;
  73. }
  74. for (i =1; i <= n; ++i, ++a)
  75. {
  76. b = bBegin;
  77. for (j =1; j <= m; ++j, ++b)
  78. {
  79. LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
  80. if (_subMatrice != nullptr)
  81. {
  82. const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : _subMatrice->result());
  83. if (chCost <= matrice[i -1][j] +1 &&
  84. chCost <= matrice[i][j -1] +1)
  85. {
  86. matrice[i][j] = chCost;
  87. subMatrice[i -1][j -1] = _subMatrice;
  88. continue;
  89. }
  90. delete _subMatrice;
  91. } // Change is not worth, consider adding/removing
  92. matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
  93. }
  94. }
  95. result->levenDist = matrice[n][m];
  96. result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
  97. result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
  98. cleanMatrice(matrice, subMatrice, n, m);
  99. return result;
  100. };
  101. template<typename T>
  102. static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
  103. {
  104. for (size_t i=0; i <= n; ++i)
  105. {
  106. delete []matrice[i];
  107. if (i != n)
  108. {
  109. for (size_t j=0; j < m; ++j)
  110. if (subMatrice[i][j])
  111. delete subMatrice[i][j];
  112. delete []subMatrice[i];
  113. }
  114. }
  115. delete []matrice;
  116. delete []subMatrice;
  117. };
  118. void addRoot(const JSonContainer *a, const JSonContainer *b)
  119. {
  120. if (levenRelativeDist < LEVENSHTEIN_SENSIBILITY)
  121. {
  122. operations[a] = eLevenshteinOperator::add;
  123. operations[b] = eLevenshteinOperator::add;
  124. }
  125. else
  126. {
  127. operations[a] = operations[b] = levenRelativeDist < 1.f ? eLevenshteinOperator::mod : eLevenshteinOperator::equ;
  128. equivalences[a] = b;
  129. equivalences[b] = a;
  130. }
  131. }
  132. std::map<const JSonElement*, const JSonElement *> getEquivalences() const;
  133. size_t result() const;
  134. bool areSimilar() const;
  135. const JSonElement *getEquivalence(const JSonElement *) const;
  136. private:
  137. template<typename T>
  138. void shortestPath(T **matrice,
  139. LevenshteinMatrice_base ***subMatrice,
  140. size_t _i, size_t _j,
  141. JSonContainer::const_iterator i, JSonContainer::const_iterator j)
  142. {
  143. while (_i || _j)
  144. {
  145. if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
  146. {
  147. operations[*i] = eLevenshteinOperator::add;
  148. --i;
  149. --_i;
  150. }
  151. else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
  152. {
  153. operations[*j] = eLevenshteinOperator::add;
  154. --j;
  155. --_j;
  156. }
  157. else if (_i && _j)
  158. {
  159. eLevenshteinOperator op =
  160. matrice[_i][_j] == matrice[_i -1][_j -1] ?
  161. eLevenshteinOperator::equ :
  162. eLevenshteinOperator::mod;
  163. operations[*i] = operations[*j] = op;
  164. for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
  165. operations[e.first] = e.second;
  166. for (std::pair<const JSonElement *, const JSonElement *> e : (subMatrice[_i -1][_j -1])->getEquivalences())
  167. equivalences[e.first] = e.second;
  168. equivalences[*i] = *j;
  169. --i;
  170. --j;
  171. --_i;
  172. --_j;
  173. }
  174. else if (_i)
  175. {
  176. operations[*i] = eLevenshteinOperator::add;
  177. --i;
  178. --_i;
  179. }
  180. else if (_j)
  181. {
  182. operations[*j] = eLevenshteinOperator::add;
  183. --j;
  184. --_j;
  185. }
  186. }
  187. }
  188. private:
  189. LevenshteinMatrice();
  190. size_t levenDist;
  191. float levenRelativeDist;
  192. std::map<const JSonElement*, const JSonElement *> equivalences;
  193. };