1
1

levenshtein.hpp 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #pragma once
  2. #include <string>
  3. #include <list>
  4. #include <limits.h>
  5. #include "jsonElement.hh"
  6. #include "levenshteinMatrice.hpp"
  7. #define LEVENSHTEIN_SENSIBILITY (0.7f)
  8. float levenshteinPercent(const std::string &a, const std::string &b);
  9. template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b);
  10. bool levenshteinStrictCompare(const char &a, const char &b);
  11. bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b);
  12. bool levenshteinCompare(const char &a, const char &b);
  13. bool levenshteinCompare(const JSonElement *a, const JSonElement *b);
  14. template<typename SIZE, class ITERATOR, class SUBTYPE>
  15. static LevenshteinMatrice<SIZE> *_levenshteinMatrice(const ITERATOR &aBegin, const ITERATOR &aEnd, const ITERATOR &bBegin, const ITERATOR &bEnd, const size_t lenA, const size_t lenB)
  16. {
  17. size_t i, j;
  18. LevenshteinMatrice<SIZE> *matrice = new LevenshteinMatrice<SIZE>(lenA, lenB);
  19. ITERATOR a = aBegin;
  20. ITERATOR b;
  21. for (i =1; a != aEnd; ++i, ++a)
  22. {
  23. b = bBegin;
  24. for (j =1; b != bEnd; ++j, ++b)
  25. matrice->set(i, j, std::min(std::min(
  26. matrice->get(i -1, j) +1,
  27. matrice->get(i, j -1) +1),
  28. matrice->get(i -1, j -1) + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1)));
  29. }
  30. return matrice;
  31. };
  32. template<typename SIZE, typename ITERATOR, class SUBTYPE>
  33. static float _levenshteinPercent(ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin, ITERATOR bEnd, size_t lenA, size_t lenB)
  34. {
  35. const size_t maxSize = std::max(lenA, lenB);
  36. while (aBegin != aEnd && bBegin != bEnd && levenshteinCompare(*aBegin, *bBegin))
  37. {
  38. aBegin++;
  39. bBegin++;
  40. lenA--;
  41. lenB--;
  42. }
  43. if (!lenA && !lenB) return 1.f;
  44. if (!lenA) return (float) lenB / maxSize;
  45. if (!lenB) return (float) lenA / maxSize;
  46. LevenshteinMatrice<SIZE> *matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
  47. const SIZE result = matrice->result();
  48. delete matrice;
  49. return 1 - ((float)result / maxSize);
  50. };
  51. template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b)
  52. {
  53. const size_t lenA = a->size();
  54. const size_t lenB = b->size();
  55. typename std::list<T*>::const_iterator aBegin = a->cbegin();
  56. typename std::list<T*>::const_iterator aEnd = a->cend();
  57. typename std::list<T*>::const_iterator bBegin = b->cbegin();
  58. typename std::list<T*>::const_iterator bEnd = b->cend();
  59. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  60. return _levenshteinPercent<unsigned char, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
  61. if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  62. return _levenshteinPercent<unsigned short, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
  63. return _levenshteinPercent<unsigned int, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
  64. }
  65. template<class T>
  66. LevenshteinMatrice_base *levenshteinShortestPath(const std::list<T*> *a, const std::list<T *> *b)
  67. {
  68. const size_t lenA = a->size();
  69. const size_t lenB = b->size();
  70. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  71. return _levenshteinMatrice<unsigned char, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
  72. if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  73. return _levenshteinMatrice<unsigned short, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
  74. return _levenshteinMatrice<unsigned int, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
  75. }