levenshtein.cpp 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. #include <climits>
  2. #include "levenshteinMatrice.hpp"
  3. #include "jsonObjectEntry.hh"
  4. size_t levenshtein(const std::string &a, const std::string &b)
  5. {
  6. int **matrice = new int*[a.size() +1]();
  7. matrice[0] = new int[b.size() +1]();
  8. for (size_t j=0; j <= b.size(); j++)
  9. matrice[0][j] = j;
  10. for (size_t i=1; i <= a.size(); ++i)
  11. {
  12. matrice[i] = new int[b.size() +1]();
  13. matrice[i][0] = i;
  14. for (size_t j=1; j <= b.size(); ++j)
  15. matrice[i][j] = std::min(std::min(
  16. matrice[i -1][j] +1,
  17. matrice[i][j -1] +1),
  18. matrice[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1));
  19. }
  20. const size_t result = matrice[a.size()][b.size()];
  21. for (size_t i=0; i <= a.size(); ++i)
  22. delete []matrice[i];
  23. delete[] matrice;
  24. return result;
  25. }
  26. float levenshteinPercent(const std::string &a, const std::string &b)
  27. {
  28. if (a.empty() && b.empty())
  29. return 1.f;
  30. return 1 - (levenshtein(a, b) / std::max(a.size(), b.size()));
  31. }
  32. /**
  33. * Levenshtein Matrice Builder stuff
  34. **/
  35. LevenshteinMatrice_base::Builder::Builder()
  36. { }
  37. LevenshteinMatrice_base::Builder::~Builder()
  38. { }
  39. LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonElement *a, const JSonElement *b) const
  40. {
  41. const bool aIsContainer = ((dynamic_cast<const JSonContainer*>(a)) != nullptr);
  42. const bool bIsContainer = ((dynamic_cast<const JSonContainer*>(b)) != nullptr);
  43. if (aIsContainer && bIsContainer)
  44. {
  45. const size_t lenA = ((const JSonContainer*) a)->size();
  46. const size_t lenB = ((const JSonContainer*) b)->size();
  47. const JSonContainer::const_iterator aBegin = ((const JSonContainer*)a)->cbegin();
  48. const JSonContainer::const_iterator bBegin = ((const JSonContainer*)b)->cbegin();
  49. LevenshteinMatrice *result = nullptr;
  50. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  51. result = LevenshteinMatrice::build<unsigned char>(aBegin, bBegin, lenA, lenB);
  52. else if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  53. result = LevenshteinMatrice::build<unsigned short>(aBegin, bBegin, lenA, lenB);
  54. else
  55. result = LevenshteinMatrice::build<unsigned int>(aBegin, bBegin, lenA, lenB);
  56. result->addRoot((const JSonContainer *)a, (const JSonContainer *)b);
  57. return result;
  58. }
  59. else if (aIsContainer)
  60. {
  61. LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
  62. result->_result = ((JSonContainer*)a)->size() +1; //TODO recursive number of all descendants
  63. return result->add(a, eLevenshteinOperator::rem)
  64. ->add(b, eLevenshteinOperator::add);
  65. }
  66. else if (bIsContainer)
  67. {
  68. LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
  69. result->_result = ((JSonContainer*)b)->size() +1; //TODO recursive number of all descendants
  70. return result->add(b, eLevenshteinOperator::rem)
  71. ->add(a, eLevenshteinOperator::add);
  72. }
  73. else
  74. {
  75. const bool aIsObject = ((dynamic_cast<const JSonObjectEntry*>(a)) != nullptr);
  76. const bool bIsObject = ((dynamic_cast<const JSonObjectEntry*>(b)) != nullptr);
  77. float result = levenshteinPercent(a->stringify(), b->stringify());
  78. if (aIsObject && bIsObject) {
  79. result *= levenshteinPercent((*(const JSonObjectEntry&)(*a))->stringify(), (*(const JSonObjectEntry&)(*b))->stringify());
  80. }
  81. return new LevenshteinMatriceWithScore(result, a, b);
  82. }
  83. }
  84. eLevenshteinOperator LevenshteinMatrice_base::get(const JSonElement *e) const
  85. {
  86. return operations.at(e);
  87. }
  88. const JSonElement *LevenshteinMatrice_base::getEquivalence(const JSonElement *) const
  89. { return nullptr; }
  90. /**
  91. * base (generic) Matrice
  92. **/
  93. const std::map<const JSonElement*, eLevenshteinOperator> LevenshteinMatrice_base::path() const
  94. { return operations; }
  95. std::map<const JSonElement*, const JSonElement *> LevenshteinMatrice_base::getEquivalences() const
  96. { return std::map<const JSonElement*, const JSonElement *>(); }
  97. /**
  98. * Normal matrice
  99. **/
  100. LevenshteinMatrice::LevenshteinMatrice()
  101. { }
  102. size_t LevenshteinMatrice::result() const
  103. { return levenDist; }
  104. bool LevenshteinMatrice::areSimilar() const
  105. { return levenRelativeDist > LEVENSHTEIN_SENSIBILITY; }
  106. const JSonElement *LevenshteinMatrice::getEquivalence(const JSonElement *e) const
  107. {
  108. std::map<const JSonElement *, const JSonElement *>::const_iterator it = equivalences.find(e);
  109. if (it == equivalences.cend())
  110. return nullptr;
  111. return (*it).second;
  112. }
  113. std::map<const JSonElement*, const JSonElement *> LevenshteinMatrice::getEquivalences() const
  114. { return equivalences; }
  115. /**
  116. * Manual matrice
  117. **/
  118. LevenshteinMatrice_manual *LevenshteinMatrice_manual::add(const JSonElement *a, eLevenshteinOperator b)
  119. {
  120. operations[a] = b;
  121. return this;
  122. }
  123. size_t LevenshteinMatrice_manual::result() const
  124. {
  125. return _result;
  126. }
  127. bool LevenshteinMatrice_manual::areSimilar() const
  128. {
  129. return false;
  130. }
  131. /**
  132. * Score matrice
  133. **/
  134. LevenshteinMatriceWithScore::LevenshteinMatriceWithScore(float s, const JSonElement *a, const JSonElement *b)
  135. {
  136. _result = s > LEVENSHTEIN_SENSIBILITY;
  137. if (_result)
  138. {
  139. equivalentA = a;
  140. equivalentB = b;
  141. }
  142. else
  143. equivalentA = equivalentB = nullptr;
  144. }
  145. const JSonElement * LevenshteinMatriceWithScore::getEquivalence(const JSonElement *a) const
  146. {
  147. if (equivalentA && equivalentB && equivalentA == a)
  148. return equivalentB;
  149. return nullptr;
  150. }
  151. std::map<const JSonElement *, const JSonElement *> LevenshteinMatriceWithScore::getEquivalences() const
  152. {
  153. std::map<const JSonElement*, const JSonElement *> res;
  154. if (equivalentA && equivalentB)
  155. res[equivalentA] = equivalentB;
  156. return res;
  157. }
  158. size_t LevenshteinMatriceWithScore::result() const
  159. {
  160. return _result ? 0 : 1;
  161. }
  162. bool LevenshteinMatriceWithScore::areSimilar() const
  163. {
  164. return _result;
  165. }