levenshtein.hh 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. #pragma once
  2. #include <string>
  3. #include <list>
  4. #include <limits.h>
  5. #include "jsonElement.hh"
  6. #include <iostream>
  7. float levenshteinPercent(const std::string &a, const std::string &b);
  8. template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b);
  9. bool levenshteinCompare(const char &a, const char &b);
  10. bool levenshteinCompare(const JSonElement *a, const JSonElement *b);
  11. template<typename SIZE, class T, class SUBTYPE>
  12. static SIZE **_levenshteinMatrice(const T *a, const T *b, const size_t lenA, const size_t lenB)
  13. {
  14. size_t i, j;
  15. SIZE **matrice = new SIZE*[lenA +1]();
  16. matrice[0] = new SIZE[lenB +1]();
  17. for (j=0; j <= lenB; j++)
  18. matrice[0][j] = j;
  19. i = 1;
  20. for (SUBTYPE it: *a)
  21. {
  22. j =1;
  23. matrice[i] = new SIZE[lenB +1]();
  24. matrice[i][0] = i;
  25. for (SUBTYPE jt: *b)
  26. {
  27. matrice[i][j] = std::min(std::min(
  28. matrice[i -1][j] +1,
  29. matrice[i][j -1] +1),
  30. matrice[i -1][j -1] + ((levenshteinCompare(it, jt) > .9f) ? 0 : 1));
  31. j++;
  32. }
  33. i++;
  34. }
  35. for (size_t i=0; i <= lenA; ++i)
  36. {
  37. for (size_t j=0; j <= lenB; ++j)
  38. std::cerr << (size_t) matrice[i][j] << "\t";
  39. std::cerr << std::endl << "[";
  40. }
  41. return matrice;
  42. };
  43. template<typename SIZE, class T, class SUBTYPE>
  44. static SIZE _levenshteinPercent(const T *a, const T *b, const size_t lenA, const size_t lenB)
  45. {
  46. if (lenA == 0) return lenB;
  47. if (lenB == 0) return lenA;
  48. SIZE **matrice = _levenshteinMatrice<SIZE, T, SUBTYPE>(a, b, lenA, lenB);
  49. size_t i;
  50. const SIZE result = matrice[lenA][lenB];
  51. for (i=0; i < lenA; ++i)
  52. delete[] matrice[i];
  53. delete[] matrice;
  54. return 1 - (result / std::max(lenA, lenB));
  55. };
  56. template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b)
  57. {
  58. const size_t lenA = a->size();
  59. const size_t lenB = b->size();
  60. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  61. return _levenshteinPercent<unsigned char, std::list<T *>, T *>(a, b, lenA, lenB);
  62. if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  63. return _levenshteinPercent<unsigned short, std::list<T *>, T *>(a, b, lenA, lenB);
  64. return _levenshteinPercent<unsigned int, std::list<T *>, T *>(a, b, lenA, lenB);
  65. }
  66. enum ePath: char
  67. {
  68. add = '+',
  69. rem = '-',
  70. mod = '!',
  71. equ = '='
  72. };
  73. template<typename SIZE, class T, class SUBTYPE>
  74. static std::list<ePath> _levenshteinShortestPath(const T *a, const T *b, const size_t lenA, const size_t lenB)
  75. {
  76. std::list<ePath> result(std::max(lenA, lenB));
  77. if (lenA == 0 || lenB == 0)
  78. //TODO create deque<ePath>(std::max(lenA, lenB) populated with '-'
  79. ;
  80. SIZE **matrice = _levenshteinMatrice<SIZE, T, SUBTYPE>(a, b, lenA, lenB);
  81. size_t i;
  82. //TODO find shortest path
  83. // Clean matrice
  84. for (i=0; i < lenA; ++i)
  85. delete[] matrice[i];
  86. delete[] matrice;
  87. return result;
  88. };
  89. template<class T>
  90. std::list<ePath> levenshteinShortestPath(const std::list<T*> *a, const std::list<T *> *b)
  91. {
  92. const size_t lenA = a->size();
  93. const size_t lenB = b->size();
  94. if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
  95. return _levenshteinShortestPath<unsigned char, std::list<T *>, T *>(a, b, lenA, lenB);
  96. if (lenA < USHRT_MAX && lenB < USHRT_MAX)
  97. return _levenshteinShortestPath<unsigned short, std::list<T *>, T *>(a, b, lenA, lenB);
  98. return _levenshteinShortestPath<unsigned int, std::list<T *>, T *>(a, b, lenA, lenB);
  99. }