isundil 9 жил өмнө
parent
commit
c2b922bee0
4 өөрчлөгдсөн 214 нэмэгдсэн , 0 устгасан
  1. 2 0
      .gitignore
  2. 22 0
      Makefile
  3. 46 0
      src/levenshtein.hpp
  4. 144 0
      test/levenshtest.cpp

+ 2 - 0
.gitignore

@@ -5,6 +5,8 @@
 *.o
 *.obj
 
+/test/levenshtest
+
 # Precompiled Headers
 *.gch
 *.pch

+ 22 - 0
Makefile

@@ -0,0 +1,22 @@
+
+TESTSRC=	test/levenshtest.cpp
+
+TESTOBJ=	$(TESTSRC:.cpp=.o)
+
+TARGET=		test/levenshtest
+
+CXXFLAGS=	-I ./src/ -g3
+
+$(TARGET):	$(TESTOBJ)
+	$(CXX)	$(TESTOBJ) -o $(TARGET)
+
+all:	$(TARGET)
+
+clean:
+	$(RM) $(TESTOBJ)
+
+fclean:	clean
+	$(RM) $(TARGET)
+
+re:	fclean all
+

+ 46 - 0
src/levenshtein.hpp

@@ -0,0 +1,46 @@
+#pragma once
+
+#include <utility>
+#include <queue>
+
+template<typename SIZE>
+class LevenshteinCoordinates
+{
+    public:
+        bool operator<(const LevenshteinCoordinates<SIZE> &other) const
+        {
+            return width < other.width;
+        }
+
+    private:
+        std::pair<SIZE, SIZE> coords;
+        unsigned int width;
+};
+
+template <class T, typename SIZE=unsigned int>
+int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
+{
+    int **items = new int*[aSize +1]();
+    std::priority_queue<LevenshteinCoordinates<SIZE> > toProcess;
+
+    for (SIZE i =0; i <= aSize; i++)
+    {
+        items[i] = new int[bSize +1]();
+        items[i][0] = i;
+        if (i == 0)
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = j;
+        else
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = std::min(std::min(
+                            items[i][j -1] +1,
+                            items[i -1][j] +1),
+                            (items[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1)));
+    }
+    const int levenshtein = items[aSize][bSize];
+    for (SIZE i =0; i < aSize +1; i++)
+        delete[] items[i];
+    delete[] items;
+    return levenshtein;
+}
+

+ 144 - 0
test/levenshtest.cpp

@@ -0,0 +1,144 @@
+#include "levenshtein.hpp"
+
+#define STRING_LEN 500
+#define TEST_COUNT 500
+
+#include <iostream>
+
+template <class T, typename SIZE=unsigned int>
+int levenshtein_base(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
+{
+    int **items = new int*[aSize +1]();
+
+    for (SIZE i =0; i <= aSize; i++)
+    {
+        items[i] = new int[bSize +1]();
+        items[i][0] = i;
+        if (i == 0)
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = j;
+        else
+            for (SIZE j =1; j <= bSize; j++)
+                items[i][j] = std::min(std::min(
+                            items[i][j -1] +1,
+                            items[i -1][j] +1),
+                            (items[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1)));
+    }
+    const int levenshtein = items[aSize][bSize];
+    for (SIZE i =0; i < aSize +1; i++)
+        delete[] items[i];
+    delete[] items;
+    return levenshtein;
+}
+
+bool simpleTest(const std::string &a, const std::string &b, unsigned int expected)
+{
+    unsigned int levenshteinScore;
+
+    if ((levenshteinScore = levenshtein(a, b, a.size(), b.size())) != expected)
+    {
+        std::cerr << "Error: failed asserting levenshteinScore on file "
+            << __FILE__ << ":" << __LINE__
+            << " (got " << levenshteinScore << ") on "
+            << a << " vs " << b
+            << std::endl;
+        return false;
+    }
+    if ((levenshteinScore = levenshtein_base(a, b, a.size(), b.size())) != expected)
+    {
+        std::cerr << "Error: failed asserting levenshteinScore on file "
+            << __FILE__ << ":" << __LINE__
+            << " (got " << levenshteinScore << ")"
+            << std::endl;
+        return false;
+    }
+    return true;
+}
+
+char randomChar()
+{
+    return (rand() % 94) + 32;
+}
+
+std::string generateString()
+{
+    std::string str;
+
+    for (int i =0; i < STRING_LEN; i++)
+        str += randomChar();
+    return str;
+}
+
+void speedTest(const std::string &a, const std::string &b)
+{
+    const unsigned int timeBefore = time(NULL);
+    for (unsigned int i =0; i < TEST_COUNT; ++i)
+        levenshtein(a, b, a.size(), b.size());
+    const unsigned int timeMid = time(NULL);
+    for (unsigned int i =0; i < TEST_COUNT; ++i)
+        levenshtein_base(a, b, a.size(), b.size());
+    const unsigned int timeEnd = time(NULL);
+
+    std::cout << "Processing items in "
+        << (float) (1000.f * (timeMid -timeBefore)) / TEST_COUNT
+        << "ms (optimized) vs "
+        << (float) (1000.f * (timeEnd -timeMid)) / TEST_COUNT
+        << "ms" << std::endl;
+}
+
+void speedTestEq()
+{
+    std::string a, b;
+
+    a = b = generateString();
+    if (!simpleTest(a, b, 0))
+        return;
+    speedTest(a, b);
+}
+
+void speedTestAddDel()
+{
+    std::string a, b;
+
+    a = generateString();
+    b = a;
+    b.erase(rand() % b.size(), 1);
+    b.erase(rand() % b.size(), 1);
+    b.erase(rand() % b.size(), 1);
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    b.insert(rand() % b.size(), std::string(1, randomChar()));
+    if (!simpleTest(a, b, 6))
+        return;
+    speedTest(a, b);
+}
+
+
+void speedTestDiff()
+{
+    std::string a, b;
+
+    a = b = generateString();
+    b[rand() % b.size()] = randomChar();
+    b[rand() % b.size()] = randomChar();
+    b[rand() % b.size()] = randomChar();
+    if (!simpleTest(a, b, 3))
+        return;
+    speedTest(a, b);
+}
+int main()
+{
+    srand(11);
+    if (!simpleTest("qwerty123", "qwerty123", 0))
+        exit(EXIT_FAILURE);
+    if (!simpleTest("abcdefghuijklmnop", "abcdefg0huijk0lmnop", 2))
+        exit(EXIT_FAILURE);
+    std::cout << "starting eq speed test" << std::endl;
+    speedTestEq();
+    std::cout << "starting add/del speed test" << std::endl;
+    speedTestAddDel();
+    std::cout << "starting diff speed test" << std::endl;
+    speedTestDiff();
+    exit(EXIT_SUCCESS);
+}
+