Przeglądaj źródła

Optimized, but takes more time o.o

isundil 8 lat temu
rodzic
commit
dfb4dc10b6
2 zmienionych plików z 169 dodań i 12 usunięć
  1. 25 12
      levenshtein.hpp
  2. 144 0
      test/levenshtest.cpp

+ 25 - 12
levenshtein.hpp

@@ -19,7 +19,7 @@
 
 #include <algorithm>
 #include <utility>
-#include <deque>
+#include <set>
 
 template <typename T>
 class LevenshteinPotencial
@@ -64,21 +64,21 @@ template <class T, typename SIZE=unsigned int>
 unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
 {
     int **items = new int*[aSize]();
-    std::deque<LevenshteinPotencial<SIZE> > toProcess;
+    std::multiset<LevenshteinPotencial<SIZE> > toProcess;
 
     for (SIZE i =0; i < aSize; i++)
     {
         items[i] = new int[bSize]();
-        toProcess.push_back(LevenshteinPotencial<SIZE>(0, i, i));
+        toProcess.insert(LevenshteinPotencial<SIZE>(0, i, i));
 
         for (SIZE j=0; j < bSize; j++)
             items[i][j] = -1;
     }
     for (SIZE i =1; i < bSize; i++)
-        toProcess.push_back(LevenshteinPotencial<SIZE>(i, 0, i));
+        toProcess.insert(LevenshteinPotencial<SIZE>(i, 0, i));
     while (toProcess.size())
     {
-        auto currentIt = toProcess.cbegin();
+        const auto currentIt = toProcess.cbegin();
         const LevenshteinPotencial<SIZE> &current = *(currentIt);
 
         int add = levenshtein_get(items, current.coords.first -1, current.coords.second);
@@ -99,28 +99,41 @@ unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bS
         }
         items[current.coords.first][current.coords.second] = min;
         if (current.coords.first == aSize -1 && current.coords.second == bSize -1)
-            return min;
+            break;
 
         //update toProcess
-        for (auto i = toProcess.begin(); i != toProcess.end(); i++)
+        add = rem = mod = -1;
+        for (auto i = toProcess.cbegin(); i != toProcess.cend(); i++)
         {
             if (*i == std::pair<SIZE, SIZE>(current.coords.first, current.coords.second +1))
+            {
+                add = (*i).minValue;
                 toProcess.erase(i);
+            }
             else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second))
+            {
+                rem = (*i).minValue;
                 toProcess.erase(i);
+            }
             else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second +1))
+            {
+                mod = (*i).minValue;
                 toProcess.erase(i);
+            }
         }
         if (current.coords.second +1 < bSize && items[current.coords.first][current.coords.second +1] == -1)
-            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first, current.coords.second +1, min +1));
+            toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first, current.coords.second +1, add == -1 ? min +1 : std::min(min +1, add)));
         if (current.coords.first +1 < aSize && items[current.coords.first +1][current.coords.second] == -1)
-            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second, min +1));
+            toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second, rem == -1 ? min +1 : std::min(min +1, rem)));
         if (current.coords.first +1 < aSize && current.coords.second +1 < bSize &&
                 items[current.coords.first +1][current.coords.second +1] == -1)
-            toProcess.push_back(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second +1, min));
+            toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second +1, mod == -1 ? min : std::min(mod, min)));
         toProcess.erase(currentIt);
-        std::sort(toProcess.begin(), toProcess.end());
     }
-    return items[aSize -1][bSize -1];
+    const unsigned int levenshtein = items[aSize -1][bSize -1];
+    for (SIZE i =0; i < aSize; i++)
+        delete[] items[i];
+    delete[] items;
+    return levenshtein;
 }
 

+ 144 - 0
test/levenshtest.cpp

@@ -0,0 +1,144 @@
+#include "levenshtein.hpp"
+
+#define STRING_LEN 500
+#define TEST_COUNT 500
+
+#include <iostream>
+
+template <class T, typename SIZE=unsigned int>
+int levenshtein_base(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
+{
+    int **items = new int*[aSize +1]();
+
+    for (SIZE i =0; i <= aSize; i++)
+    {
+        items[i] = new int[bSize +1]();
+        items[i][0] = i;
+        if (i == 0)
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = j;
+        else
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = std::min(std::min(
+                            items[i][j -1] +1,
+                            items[i -1][j] +1),
+                            (items[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1)));
+    }
+    const int levenshtein = items[aSize][bSize];
+    for (SIZE i =0; i < aSize +1; i++)
+        delete[] items[i];
+    delete[] items;
+    return levenshtein;
+}
+
+bool simpleTest(const std::string &a, const std::string &b, unsigned int expected)
+{
+    unsigned int levenshteinScore;
+
+    if ((levenshteinScore = levenshtein(a, b, a.size(), b.size())) != expected)
+    {
+        std::cerr << "Error: failed asserting levenshteinScore on file "
+            << __FILE__ << ":" << __LINE__
+            << " (got " << levenshteinScore << ") on "
+            << a << " vs " << b
+            << std::endl;
+        return false;
+    }
+    if ((levenshteinScore = levenshtein_base(a, b, a.size(), b.size())) != expected)
+    {
+        std::cerr << "Error: failed asserting levenshteinScore on file "
+            << __FILE__ << ":" << __LINE__
+            << " (got " << levenshteinScore << ")"
+            << std::endl;
+        return false;
+    }
+    return true;
+}
+
+char randomChar()
+{
+    return (rand() % 94) + 32;
+}
+
+std::string generateString()
+{
+    std::string str;
+
+    for (int i =0; i < STRING_LEN; i++)
+        str += randomChar();
+    return str;
+}
+
+void speedTest(const std::string &a, const std::string &b)
+{
+    const unsigned int timeBefore = time(NULL);
+    for (unsigned int i =0; i < TEST_COUNT; ++i)
+        levenshtein(a, b, a.size(), b.size());
+    const unsigned int timeMid = time(NULL);
+    for (unsigned int i =0; i < TEST_COUNT; ++i)
+        levenshtein_base(a, b, a.size(), b.size());
+    const unsigned int timeEnd = time(NULL);
+
+    std::cout << "Processing items in "
+        << (float) (1000.f * (timeMid -timeBefore)) / TEST_COUNT
+        << "ms (optimized) vs "
+        << (float) (1000.f * (timeEnd -timeMid)) / TEST_COUNT
+        << "ms" << std::endl;
+}
+
+void speedTestEq()
+{
+    std::string a, b;
+
+    a = b = generateString();
+    if (!simpleTest(a, b, 0))
+        return;
+    speedTest(a, b);
+}
+
+void speedTestAddDel()
+{
+    std::string a, b;
+
+    a = generateString();
+    b = a;
+    b.erase(rand() % b.size(), 1);
+    b.erase(rand() % b.size(), 1);
+    b.erase(rand() % b.size(), 1);
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    if (!simpleTest(a, b, 6))
+        return;
+    speedTest(a, b);
+}
+
+
+void speedTestDiff()
+{
+    std::string a, b;
+
+    a = b = generateString();
+    b[rand() % b.size()] = randomChar();
+    b[rand() % b.size()] = randomChar();
+    b[rand() % b.size()] = randomChar();
+    if (!simpleTest(a, b, 3))
+        return;
+    speedTest(a, b);
+}
+int main()
+{
+    srand(11);
+    if (!simpleTest("qwerty123", "qwerty123", 0))
+        exit(EXIT_FAILURE);
+    if (!simpleTest("abcdefghuijklmnop", "abcdefg0huijk0lmnop", 2))
+        exit(EXIT_FAILURE);
+    std::cout << "starting eq speed test" << std::endl;
+    speedTestEq();
+    std::cout << "starting add/del speed test" << std::endl;
+    speedTestAddDel();
+    std::cout << "starting diff speed test" << std::endl;
+    speedTestDiff();
+    exit(EXIT_SUCCESS);
+}
+