levenshteinMatrice.hpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. #pragma once
  2. #include <iostream>
  3. #include <map>
  4. #include "jsonContainer.hh"
  5. #include "levenshtein.hpp"
  6. enum eLevenshteinOperator: char
  7. {
  8. add = '+',
  9. rem = '-',
  10. mod = '!',
  11. equ = '='
  12. };
  13. class LevenshteinMatrice_base
  14. {
  15. public:
  16. virtual ~LevenshteinMatrice_base() {}
  17. const std::map<const JSonElement*, eLevenshteinOperator> path() const;
  18. virtual size_t result() const =0;
  19. virtual bool areSimilar() const =0;
  20. virtual void debug(std::ostream &out) const =0;
  21. public:
  22. class Builder
  23. {
  24. public:
  25. Builder();
  26. ~Builder();
  27. LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
  28. };
  29. protected:
  30. std::map<const JSonElement*, eLevenshteinOperator> operations;
  31. };
  32. class LevenshteinMatrice_manual: public LevenshteinMatrice_base
  33. {
  34. public:
  35. LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
  36. size_t result() const;
  37. bool areSimilar() const;
  38. void debug(std::ostream &out) const;
  39. public:
  40. size_t _result;
  41. };
  42. class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
  43. {
  44. public:
  45. LevenshteinMatriceWithScore(float score);
  46. size_t result() const;
  47. void debug(std::ostream &out) const;
  48. bool areSimilar() const;
  49. private:
  50. bool _result;
  51. };
  52. template<typename T>
  53. class LevenshteinMatrice: public LevenshteinMatrice_base
  54. {
  55. public:
  56. LevenshteinMatrice(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator aEnd,
  57. const JSonContainer::const_iterator bBegin, const JSonContainer::const_iterator bEnd,
  58. size_t n, size_t m)
  59. {
  60. size_t i, j;
  61. JSonContainer::const_iterator a = aBegin;
  62. JSonContainer::const_iterator b;
  63. LevenshteinMatrice_base::Builder matriceBuilder;
  64. this->n = n;
  65. this->m = m;
  66. this->matrice = new T*[n +1]();
  67. this->subMatrice = new LevenshteinMatrice_base**[n +1]();
  68. matrice[0] = new T[m +1];
  69. for (i =1; i <= m; ++i)
  70. matrice[0][i] = i;
  71. for (i=1; i <= n; ++i)
  72. {
  73. matrice[i] = new T[m +1];
  74. matrice[i][0] = i;
  75. }
  76. for (i=0; i <= n; ++i)
  77. {
  78. subMatrice[i] = new LevenshteinMatrice_base*[m +1];
  79. for (size_t j=0; j <= m; ++j)
  80. subMatrice[i][j] = nullptr;
  81. }
  82. for (i =0; a != aEnd; ++i, ++a)
  83. {
  84. b = bBegin;
  85. for (j =0; b != bEnd; ++j, ++b)
  86. {
  87. LevenshteinMatrice_base *subMatrice = matriceBuilder.build(*a, *b);
  88. if (subMatrice != nullptr)
  89. {
  90. const T chCost = get(i, j) + (subMatrice->areSimilar() ? 0 : 1);
  91. if (chCost <= get(i, j +1) +1 && chCost <= get(i +1, j))
  92. {
  93. matrice[i +1][j +1] = chCost;
  94. this->subMatrice[i +1][j +1] = subMatrice;
  95. continue;
  96. }
  97. delete subMatrice;
  98. } // Change is not worth or subMatrice is null (eg. a and b has different types)
  99. matrice[i +1][j +1] = std::min(get(i, j +1), get(i +1, j)) +1;
  100. }
  101. }
  102. };
  103. ~LevenshteinMatrice()
  104. {
  105. for (size_t i=0; i <= n; ++i)
  106. {
  107. delete []matrice[i];
  108. for (size_t j=0; j <= m; ++j)
  109. if (subMatrice[i][j])
  110. delete subMatrice[i][j];
  111. delete []subMatrice[i];
  112. }
  113. delete []matrice;
  114. delete []subMatrice;
  115. };
  116. void prune()
  117. {
  118. //TODO
  119. }
  120. T get(size_t a, size_t b) const
  121. {
  122. return matrice[a][b];
  123. };
  124. std::list<eLevenshteinOperator> shortestPath() const
  125. {
  126. std::list<eLevenshteinOperator> result;
  127. size_t i = n;
  128. size_t j = m;
  129. while (i || j)
  130. {
  131. if (i && (!j || matrice[i][j] > matrice[i-1][j]))
  132. {
  133. result.push_front(eLevenshteinOperator::add);
  134. --i;
  135. }
  136. else if (j && (!i || matrice[i][j] > matrice[i][j -1]))
  137. {
  138. result.push_front(eLevenshteinOperator::rem);
  139. --j;
  140. }
  141. else if (i && j)
  142. {
  143. result.push_front(matrice[i][j] == matrice[i-1][j-1] ? eLevenshteinOperator::equ : eLevenshteinOperator::mod);
  144. --i;
  145. --j;
  146. }
  147. else if (i)
  148. {
  149. result.push_front(eLevenshteinOperator::add);
  150. --i;
  151. }
  152. else if (j)
  153. {
  154. result.push_front(eLevenshteinOperator::rem);
  155. --j;
  156. }
  157. }
  158. return result;
  159. }
  160. void debug(std::ostream &o) const
  161. {
  162. for (size_t i =0; i <= n; ++i)
  163. {
  164. for (size_t j=0; j <= m; ++j)
  165. o << (int) (matrice[n][m]) << '\t';
  166. o << std::endl;
  167. }
  168. }
  169. size_t result() const
  170. {
  171. return (size_t) matrice[n][m];
  172. };
  173. bool areSimilar() const
  174. {
  175. float levenRelativeDist = 1 -(result() / std::max(n, m));
  176. return levenRelativeDist > LEVENSHTEIN_SENSIBILITY;
  177. }
  178. private:
  179. T **matrice;
  180. /**
  181. * Usefull only on `modify' operation
  182. **/
  183. LevenshteinMatrice_base ***subMatrice;
  184. size_t n;
  185. size_t m;
  186. };