Browse Source

[add] optim

isundil 8 years ago
parent
commit
9a0d9f1e61
2 changed files with 80 additions and 48 deletions
  1. 80 35
      levenshtein.hpp
  2. 0 13
      test/test.cpp

+ 80 - 35
levenshtein.hpp

@@ -17,10 +17,39 @@
 
 #pragma once
 
+#include <algorithm>
 #include <utility>
-#include <queue>
+#include <deque>
 
-static unsigned int levenshtein_get(unsigned int * const *map, int px, int py)
+template <typename T>
+class LevenshteinPotencial
+{
+    public:
+        LevenshteinPotencial(const T &px, const T &py, const T value): coords(std::pair<T, T>(px, py)), minValue(value)
+        { }
+
+        unsigned int getTaxiCab() const
+        {
+            return coords.first +coords.second;
+        }
+
+        bool operator<(const LevenshteinPotencial &o) const
+        {
+            if (minValue == o.minValue)
+                return getTaxiCab() > o.getTaxiCab(); // The bigger the best
+            return minValue < o.minValue;
+        }
+
+        bool operator==(const std::pair<T, T> &o) const
+        {
+            return coords.first == o.first && coords.second == o.second;
+        }
+
+        std::pair<T, T> coords;
+        T minValue;
+};
+
+static int levenshtein_get(int **map, int px, int py)
 {
     if (px == -1 && py == -1)
         return 0;
@@ -32,50 +61,66 @@ static unsigned int levenshtein_get(unsigned int * const *map, int px, int py)
 }
 
 template <class T, typename SIZE=unsigned int>
-unsigned int levenshtein(const T *a, const T *b, const SIZE aSize, const SIZE bSize)
+unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
 {
-    unsigned int **items = new unsigned int*[aSize]();
+    int **items = new int*[aSize]();
+    std::deque<LevenshteinPotencial<SIZE> > toProcess;
 
     for (SIZE i =0; i < aSize; i++)
     {
-        items[i] = new unsigned int[bSize]();
-        for (SIZE j =0; j < bSize; j++)
-        {
-            unsigned int add = levenshtein_get(items, i, j -1) +1;
-            unsigned int del = levenshtein_get(items, i -1, j) +1;
-            unsigned int mod = levenshtein_get(items, i -1, j -1) +(a[i] == b[j] ? 0 : 1);
+        items[i] = new int[bSize]();
+        toProcess.push_back(LevenshteinPotencial<SIZE>(0, i, i));
 
-            items[i][j] = std::min(std::min(add, del), mod);
-        }
+        for (SIZE j=0; j < bSize; j++)
+            items[i][j] = -1;
     }
-    const unsigned int levenshtein = items[aSize -1][bSize -1];
-    for (SIZE i =0; i < aSize; i++)
-        delete[] items[i];
-    delete[] items;
-    return levenshtein;
-}
+    for (SIZE i =1; i < bSize; i++)
+        toProcess.push_back(LevenshteinPotencial<SIZE>(i, 0, i));
+    while (toProcess.size())
+    {
+        auto currentIt = toProcess.cbegin();
+        const LevenshteinPotencial<SIZE> &current = *(currentIt);
 
-template <class T, typename SIZE=unsigned int>
-unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
-{
-    unsigned int **items = new unsigned int*[aSize]();
+        int add = levenshtein_get(items, current.coords.first -1, current.coords.second);
+        int rem = levenshtein_get(items, current.coords.first, current.coords.second -1);
+        int mod = levenshtein_get(items, current.coords.first -1, current.coords.second -1);
+        int min = -1;
 
-    for (SIZE i =0; i < aSize; i++)
-    {
-        items[i] = new unsigned int[bSize]();
-        for (SIZE j =0; j < bSize; j++)
+        // Compute weight
+        if (add != -1)
+            min = add +1;
+        if (rem != -1)
+            min = min == -1 ? rem +1 : std::min(min, rem +1);
+        if (mod != -1)
         {
-            unsigned int add = levenshtein_get(items, i, j -1) +1;
-            unsigned int del = levenshtein_get(items, i -1, j) +1;
-            unsigned int mod = levenshtein_get(items, i -1, j -1) +(a[i] == b[j] ? 0 : 1);
+            if (a[current.coords.first] != b[current.coords.second])
+                mod++;
+            min = min == -1 ? mod : std::min(min, mod);
+        }
+        items[current.coords.first][current.coords.second] = min;
+        if (current.coords.first == aSize -1 && current.coords.second == bSize -1)
+            return min;
 
-            items[i][j] = std::min(std::min(add, del), mod);
+        //update toProcess
+        for (auto i = toProcess.begin(); i != toProcess.end(); i++)
+        {
+            if (*i == std::pair<SIZE, SIZE>(current.coords.first, current.coords.second +1))
+                toProcess.erase(i);
+            else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second))
+                toProcess.erase(i);
+            else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second +1))
+                toProcess.erase(i);
         }
+        if (current.coords.second +1 < bSize && items[current.coords.first][current.coords.second +1] == -1)
+            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first, current.coords.second +1, min +1));
+        if (current.coords.first +1 < aSize && items[current.coords.first +1][current.coords.second] == -1)
+            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second, min +1));
+        if (current.coords.first +1 < aSize && current.coords.second +1 < bSize &&
+                items[current.coords.first +1][current.coords.second +1] == -1)
+            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second +1, min));
+        toProcess.erase(currentIt);
+        std::sort(toProcess.begin(), toProcess.end());
     }
-    const unsigned int levenshtein = items[aSize -1][bSize -1];
-    for (SIZE i =0; i < aSize; i++)
-        delete[] items[i];
-    delete[] items;
-    return levenshtein;
+    return items[aSize -1][bSize -1];
 }
 

+ 0 - 13
test/test.cpp

@@ -22,19 +22,6 @@
 
 int main()
 {
-    if (levenshtein("abcdef", "abcdef", 6, 6) != 0)
-        FAILED;
-    if (levenshtein("abcdef", "abcdf", 6, 5) != 1)
-        FAILED;
-    if (levenshtein("abcdf", "abcdef", 5, 6) != 1)
-        FAILED;
-    if (levenshtein("abcdf", "abcef", 5, 5) != 1)
-        FAILED;
-    if (levenshtein("abcf", "abdcf", 4, 5) != 1)
-        FAILED;
-    if (levenshtein("lorem", "liroem", 5, 6) != 2)
-        FAILED;
-
     if (levenshtein<std::string, int>("abcdef", "abcdef", 6, 6) != 0)
         FAILED;
     if (levenshtein<std::string, int>("abcdef", "abcdf", 6, 5) != 1)