levenshtein.cpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #include <climits>
  2. #include "levenshteinMatrice.hpp"
  3. #include "jsonObjectEntry.hh"
  4. size_t levenshtein(const std::string &a, const std::string &b)
  5. {
  6. int **matrice = new int*[a.size() +1]();
  7. matrice[0] = new int[b.size() +1]();
  8. for (size_t j=0; j <= b.size(); j++)
  9. matrice[0][j] = j;
  10. for (size_t i=1; i <= a.size(); ++i)
  11. {
  12. matrice[i] = new int[b.size() +1]();
  13. matrice[i][0] = i;
  14. for (size_t j=1; j <= b.size(); ++j)
  15. matrice[i][j] = std::min(std::min(
  16. matrice[i -1][j] +1,
  17. matrice[i][j -1] +1),
  18. matrice[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1));
  19. }
  20. const size_t result = matrice[a.size()][b.size()];
  21. for (size_t i=0; i <= a.size(); ++i)
  22. delete []matrice[i];
  23. delete[] matrice;
  24. return result;
  25. }
  26. float levenshteinPercent(const std::string &a, const std::string &b)
  27. {
  28. if (a.empty() && b.empty())
  29. return 1.f;
  30. return 1 - (levenshtein(a, b) / std::max(a.size(), b.size()));
  31. }
  32. /**
  33. * Levenshtein Matrice Builder stuff
  34. **/
  35. LevenshteinMatrice_base::Builder::Builder()
  36. { }
  37. LevenshteinMatrice_base::Builder::~Builder()
  38. { }
  39. LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonElement *a, const JSonElement *b) const
  40. {
  41. const bool aIsContainer = ((dynamic_cast<const JSonContainer*>(a)) != nullptr);
  42. const bool bIsContainer = ((dynamic_cast<const JSonContainer*>(b)) != nullptr);
  43. if (aIsContainer && bIsContainer)
  44. {
  45. const size_t lenA = ((const JSonContainer*) a)->size();
  46. const size_t lenB = ((const JSonContainer*) b)->size();
  47. const JSonContainer::const_iterator aBegin = ((const JSonContainer*)a)->cbegin();
  48. const JSonContainer::const_iterator bBegin = ((const JSonContainer*)b)->cbegin();
  49. LevenshteinMatrice *result = nullptr;
  50. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  51. result = LevenshteinMatrice::build<unsigned char>(aBegin, bBegin, lenA, lenB);
  52. else if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  53. result = LevenshteinMatrice::build<unsigned short>(aBegin, bBegin, lenA, lenB);
  54. else
  55. result = LevenshteinMatrice::build<unsigned int>(aBegin, bBegin, lenA, lenB);
  56. result->addRoot((const JSonContainer *)a, (const JSonContainer *)b);
  57. return result;
  58. }
  59. else if (aIsContainer)
  60. {
  61. LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
  62. result->_result = ((JSonContainer*)a)->size() +1; //TODO recursive number of all descendants
  63. return result->add(a, eLevenshteinOperator::rem)
  64. ->add(b, eLevenshteinOperator::add);
  65. }
  66. else if (bIsContainer)
  67. {
  68. LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
  69. result->_result = ((JSonContainer*)b)->size() +1; //TODO recursive number of all descendants
  70. return result->add(b, eLevenshteinOperator::rem)
  71. ->add(a, eLevenshteinOperator::add);
  72. }
  73. else
  74. {
  75. const bool aIsObject = ((dynamic_cast<const JSonObjectEntry*>(a)) != nullptr);
  76. const bool bIsObject = ((dynamic_cast<const JSonObjectEntry*>(b)) != nullptr);
  77. if (aIsObject && bIsObject)
  78. {
  79. if (a->stringify() == b->stringify() && (*(const JSonObjectEntry&)(*a))->stringify() == (*(const JSonObjectEntry&)(*b))->stringify())
  80. return LevenshteinMatrice_base::Builder::build(*(const JSonObjectEntry&)(*a), *(const JSonObjectEntry&)(*b));
  81. return new LevenshteinMatriceWithScore(0.f, a, b);
  82. }
  83. else if (aIsObject || bIsObject)
  84. return new LevenshteinMatriceWithScore(0.f, a, b);
  85. return new LevenshteinMatriceWithScore(levenshteinPercent(a->stringify(), b->stringify()), a, b);
  86. }
  87. }
  88. eLevenshteinOperator LevenshteinMatrice_base::get(const JSonElement *e) const
  89. {
  90. return operations.at(e);
  91. }
  92. const JSonElement *LevenshteinMatrice_base::getEquivalence(const JSonElement *) const
  93. { return nullptr; }
  94. /**
  95. * base (generic) Matrice
  96. **/
  97. const std::map<const JSonElement*, eLevenshteinOperator> LevenshteinMatrice_base::path() const
  98. { return operations; }
  99. std::map<const JSonElement*, const JSonElement *> LevenshteinMatrice_base::getEquivalences() const
  100. { return std::map<const JSonElement*, const JSonElement *>(); }
  101. /**
  102. * Normal matrice
  103. **/
  104. LevenshteinMatrice::LevenshteinMatrice()
  105. { }
  106. size_t LevenshteinMatrice::result() const
  107. { return levenDist; }
  108. bool LevenshteinMatrice::areSimilar() const
  109. { return levenRelativeDist > LEVENSHTEIN_SENSIBILITY; }
  110. const JSonElement *LevenshteinMatrice::getEquivalence(const JSonElement *e) const
  111. {
  112. for (std::pair<const JSonElement *, const JSonElement *> it : equivalences)
  113. {
  114. if (it.first == e)
  115. return it.second;
  116. if (it.second == e)
  117. return it.first;
  118. }
  119. return nullptr;
  120. }
  121. std::map<const JSonElement*, const JSonElement *> LevenshteinMatrice::getEquivalences() const
  122. { return equivalences; }
  123. /**
  124. * Manual matrice
  125. **/
  126. LevenshteinMatrice_manual *LevenshteinMatrice_manual::add(const JSonElement *a, eLevenshteinOperator b)
  127. {
  128. operations[a] = b;
  129. return this;
  130. }
  131. size_t LevenshteinMatrice_manual::result() const
  132. {
  133. return _result;
  134. }
  135. bool LevenshteinMatrice_manual::areSimilar() const
  136. {
  137. return false;
  138. }
  139. /**
  140. * Score matrice
  141. **/
  142. LevenshteinMatriceWithScore::LevenshteinMatriceWithScore(float s, const JSonElement *a, const JSonElement *b)
  143. {
  144. _result = s > LEVENSHTEIN_SENSIBILITY;
  145. if (_result)
  146. {
  147. equivalentA = a;
  148. equivalentB = b;
  149. }
  150. else
  151. equivalentA = equivalentB = nullptr;
  152. }
  153. const JSonElement * LevenshteinMatriceWithScore::getEquivalence(const JSonElement *a) const
  154. {
  155. if (equivalentA && equivalentB && equivalentA == a)
  156. return equivalentB;
  157. return nullptr;
  158. }
  159. std::map<const JSonElement *, const JSonElement *> LevenshteinMatriceWithScore::getEquivalences() const
  160. {
  161. std::map<const JSonElement*, const JSonElement *> res;
  162. if (equivalentA && equivalentB)
  163. res[equivalentA] = equivalentB;
  164. return res;
  165. }
  166. size_t LevenshteinMatriceWithScore::result() const
  167. {
  168. return _result ? 0 : 1;
  169. }
  170. bool LevenshteinMatriceWithScore::areSimilar() const
  171. {
  172. return _result;
  173. }