Browse Source

[TMP BORKEN] refactored levenshteinCache into recursive levenshtein Matrice

B Thibault 9 years ago
parent
commit
02df267cbc

+ 1 - 1
include/curseSplitOutput.hh

@@ -80,7 +80,7 @@ class CurseSplitOutput: public CurseOutput
          * currently searching pattern and its results
         **/
         std::deque<std::list<const JSonElement*> > search_result;
-        std::map<const JSonElement *, eLevenshteinOperator> diffResult;
+        LevenshteinMatrice_base *diffMatrice;
 
         /**
          * Viewport start

+ 14 - 108
include/levenshtein.hpp

@@ -4,7 +4,7 @@
 #include <list>
 #include <limits.h>
 #include "jsonElement.hh"
-#include "levenshteinCache.hh"
+#include "levenshteinMatrice.hpp"
 
 #define LEVENSHTEIN_SENSIBILITY (0.7f)
 
@@ -16,26 +16,21 @@ bool levenshteinCompare(const char &a, const char &b);
 bool levenshteinCompare(const JSonElement *a, const JSonElement *b);
 
 template<typename SIZE, class ITERATOR, class SUBTYPE>
-static SIZE **_levenshteinMatrice(const ITERATOR &aBegin, const ITERATOR &aEnd, const ITERATOR &bBegin, const ITERATOR &bEnd, const size_t lenA, const size_t lenB)
+static LevenshteinMatrice<SIZE> *_levenshteinMatrice(const ITERATOR &aBegin, const ITERATOR &aEnd, const ITERATOR &bBegin, const ITERATOR &bEnd, const size_t lenA, const size_t lenB)
 {
     size_t i, j;
-    SIZE **matrice = new SIZE*[lenA +1]();
+    LevenshteinMatrice<SIZE> *matrice = new LevenshteinMatrice<SIZE>(lenA, lenB);
     ITERATOR a = aBegin;
     ITERATOR b;
 
-    matrice[0] = new SIZE[lenB +1]();
-    for (j=0; j <= lenB; j++)
-        matrice[0][j] = j;
     for (i =1; a != aEnd; ++i, ++a)
     {
-        matrice[i] = new SIZE[lenB +1]();
-        matrice[i][0] = i;
         b = bBegin;
         for (j =1; b != bEnd; ++j, ++b)
-            matrice[i][j] = std::min(std::min(
-                    matrice[i -1][j] +1,
-                    matrice[i][j -1] +1),
-                    matrice[i -1][j -1] + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1));
+            matrice->set(i, j, std::min(std::min(
+                    matrice->get(i -1, j) +1,
+                    matrice->get(i, j -1) +1),
+                    matrice->get(i -1, j -1) + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1)));
     }
     return matrice;
 };
@@ -54,13 +49,10 @@ static float _levenshteinPercent(ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin
     if (!lenA && !lenB) return 1.f;
     if (!lenA) return (float) lenB / maxSize;
     if (!lenB) return (float) lenA / maxSize;
-    SIZE **matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-    size_t i;
-    const SIZE result = matrice[lenA][lenB];
+    LevenshteinMatrice<SIZE> *matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+    const SIZE result = matrice->result();
 
-    for (i=0; i < lenA; ++i)
-        delete[] matrice[i];
-    delete[] matrice;
+    delete matrice;
     return 1 - ((float)result / maxSize);
 };
 
@@ -80,102 +72,16 @@ template<class T> float levenshteinPercent(const std::list<T *> *a, const std::l
     return _levenshteinPercent<unsigned int, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
 }
 
-template<typename SIZE, class ITERATOR, class SUBTYPE>
-static size_t _levenshteinShortestPath(std::list<eLevenshteinOperator> &result, ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin, ITERATOR bEnd, size_t lenA, size_t lenB)
-{
-    const size_t initLenA = lenA;
-    const size_t initLenB = lenB;
-    result.clear();
-
-    while (aBegin != aEnd && bBegin != bEnd && levenshteinCompare(*aBegin, *bBegin))
-    {
-        aBegin++;
-        bBegin++;
-        lenA--;
-        lenB--;
-    }
-    if (!lenA && !lenB)
-    {
-        for (size_t i=0; i < initLenA; ++i)
-            result.push_back(eLevenshteinOperator::equ);
-        return 0;
-    }
-    else if (!lenA)
-    {
-        size_t i;
-        for (i=0; i < initLenB - lenB; ++i)
-            result.push_back(eLevenshteinOperator::equ);
-        for (; i < initLenB; ++i)
-            result.push_back(eLevenshteinOperator::rem);
-        return lenB;
-    }
-    else if (!lenB)
-    {
-        size_t i;
-        for (i=0; i < initLenA - lenA; ++i)
-            result.push_back(eLevenshteinOperator::equ);
-        for (; i < initLenA; ++i)
-            result.push_back(eLevenshteinOperator::add);
-        return lenA;
-    }
-    SIZE **matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-    size_t i = lenA;
-    size_t j = lenB;
-    const size_t levenDist = matrice[i][j];
-
-    while (i || j)
-    {
-        if (i && (!j || matrice[i][j] > matrice[i-1][j]))
-        {
-            result.push_front(eLevenshteinOperator::add);
-            --i;
-        }
-        else if (j && (!i || matrice[i][j] > matrice[i][j -1]))
-        {
-            result.push_front(eLevenshteinOperator::rem);
-            --j;
-        }
-        else if (i && j)
-        {
-            result.push_front(matrice[i][j] == matrice[i-1][j-1] ? eLevenshteinOperator::equ : eLevenshteinOperator::mod);
-            --i;
-            --j;
-        }
-        else if (i)
-        {
-            result.push_front(eLevenshteinOperator::add);
-            --i;
-        }
-        else if (j)
-        {
-            result.push_front(eLevenshteinOperator::rem);
-            --j;
-        }
-    }
-
-    for (i = initLenA - lenA; i; --i)
-        result.push_front(eLevenshteinOperator::equ);
-
-    //TODO
-    LevenshteinCache<JSonElement *>::instance();
-
-    // Clean matrice
-    for (i=0; i < lenA +1; ++i)
-        delete[] matrice[i];
-    delete[] matrice;
-    return levenDist;
-};
-
 template<class T>
-size_t levenshteinShortestPath(std::list<eLevenshteinOperator> &result, const std::list<T*> *a, const std::list<T *> *b)
+LevenshteinMatrice_base *levenshteinShortestPath(const std::list<T*> *a, const std::list<T *> *b)
 {
     const size_t lenA = a->size();
     const size_t lenB = b->size();
 
     if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinShortestPath<unsigned char, typename std::list<T *>::const_iterator, T *>(result, a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
+        return _levenshteinMatrice<unsigned char, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
     if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinShortestPath<unsigned short, typename std::list<T *>::const_iterator, T *>(result, a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
-    return _levenshteinShortestPath<unsigned int, typename std::list<T *>::const_iterator, T *>(result, a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
+        return _levenshteinMatrice<unsigned short, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
+    return _levenshteinMatrice<unsigned int, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
 }
 

+ 0 - 43
include/levenshteinCache.hh

@@ -1,43 +0,0 @@
-#pragma once
-
-#include <map>
-#include "levenshtein.hpp"
-
-enum eLevenshteinOperator: char
-{
-    add = '+',
-    rem = '-',
-    mod = '!',
-    equ = '='
-};
-
-template<class T> class LevenshteinCache
-{
-    public:
-        ~LevenshteinCache()
-        {}
-
-        void push(const T key, const eLevenshteinOperator &value)
-        {
-            cache[key] = value;
-        }
-
-    public:
-        static LevenshteinCache<T> *instance()
-        {
-            if (LevenshteinCache<JSonElement *>::_instance)
-                return LevenshteinCache<JSonElement *>::_instance;
-            return LevenshteinCache<JSonElement *>::_instance = new LevenshteinCache<JSonElement *>();
-        }
-
-    private:
-        LevenshteinCache()
-        { }
-
-    private:
-        std::map<const T, eLevenshteinOperator> cache;
-
-    private:
-        static LevenshteinCache<T> *_instance;
-};
-

+ 129 - 0
include/levenshteinMatrice.hpp

@@ -0,0 +1,129 @@
+#pragma once
+
+#include "levenshtein.hpp"
+
+enum eLevenshteinOperator: char
+{
+    add = '+',
+    rem = '-',
+    mod = '!',
+    equ = '='
+};
+
+class LevenshteinMatrice_base
+{
+    public:
+        virtual ~LevenshteinMatrice_base() {}
+};
+
+template<typename T>
+class LevenshteinMatrice: public LevenshteinMatrice_base
+{
+    public:
+        LevenshteinMatrice(size_t n, size_t m)
+        {
+            this->n = n;
+            this->m = m;
+            this->matrice = new T*[n +1]();
+            this->subMatrice = new LevenshteinMatrice_base**[n +1]();
+
+            matrice[0] = new T[m +1];
+            for (size_t i =1; i <= m; ++i)
+                matrice[0][i] = i;
+
+            for (size_t i=1; i <= n; ++i)
+            {
+                matrice[i] = new T[m +1];
+                matrice[i][0] = i;
+            }
+
+            for (size_t i=0; i <= n; ++i)
+            {
+                subMatrice[i] = new LevenshteinMatrice_base*[m +1];
+                for (size_t j=0; j <= m; ++j)
+                    subMatrice[i][j] = nullptr;
+            }
+        };
+
+        ~LevenshteinMatrice()
+        {
+            for (size_t i=0; i <= n; ++i)
+            {
+                delete []matrice[i];
+                for (size_t j=0; j <= m; ++j)
+                    if (subMatrice[i][j])
+                        delete subMatrice[i][j];
+                delete []subMatrice[i];
+            }
+            delete []matrice;
+            delete []subMatrice;
+        };
+
+        void prune()
+        {
+            //TODO
+        }
+
+        T get(size_t a, size_t b) const
+        {
+            return matrice[a][b];
+        };
+
+        void set(size_t a, size_t b, T value, LevenshteinMatrice_base *subMatrice =nullptr)
+        {
+            matrice[a][b] = value;
+            this->subMatrice[a][b] = subMatrice;
+        };
+
+        std::list<eLevenshteinOperator> shortestPath() const
+        {
+            std::list<eLevenshteinOperator> result;
+
+            size_t i = n;
+            size_t j = m;
+
+            while (i || j)
+            {
+                if (i && (!j || matrice[i][j] > matrice[i-1][j]))
+                {
+                    result.push_front(eLevenshteinOperator::add);
+                    --i;
+                }
+                else if (j && (!i || matrice[i][j] > matrice[i][j -1]))
+                {
+                    result.push_front(eLevenshteinOperator::rem);
+                    --j;
+                }
+                else if (i && j)
+                {
+                    result.push_front(matrice[i][j] == matrice[i-1][j-1] ? eLevenshteinOperator::equ : eLevenshteinOperator::mod);
+                    --i;
+                    --j;
+                }
+                else if (i)
+                {
+                    result.push_front(eLevenshteinOperator::add);
+                    --i;
+                }
+                else if (j)
+                {
+                    result.push_front(eLevenshteinOperator::rem);
+                    --j;
+                }
+            }
+            return result;
+        }
+
+        T result() const
+        {
+            return matrice[n][m];
+        };
+
+    private:
+        T **matrice;
+        LevenshteinMatrice_base ***subMatrice;
+
+        size_t n;
+        size_t m;
+};
+

+ 10 - 24
src/curseSplitOutput.cpp

@@ -17,12 +17,18 @@
 
 CurseSplitOutput::CurseSplitOutput(const Params &p): CurseOutput(p)
 {
+    diffMatrice = nullptr;
     init();
 }
 
 CurseSplitOutput::~CurseSplitOutput()
 {
     shutdown();
+    if (diffMatrice)
+    {
+        delete diffMatrice;
+        diffMatrice = nullptr;
+    }
 }
 
 void CurseSplitOutput::run(const std::deque<std::string> &inputName, const std::deque<JSonElement*> &roots)
@@ -85,29 +91,7 @@ void CurseSplitOutput::computeDiff()
     }
     else
     {
-        std::list<eLevenshteinOperator> diffList;
-        levenshteinShortestPath<JSonElement>(diffList, a, b);
-
-        JSonContainer::const_iterator it = a->cbegin();
-        for (eLevenshteinOperator i : diffList)
-            if (it != a->cend() &&
-                    (i == eLevenshteinOperator::equ ||
-                    i == eLevenshteinOperator::mod ||
-                    i == eLevenshteinOperator::add))
-            {
-                diffResult[*it] = i;
-                it++;
-            }
-        it = b->cbegin();
-        for (eLevenshteinOperator i : diffList)
-            if (it != b->cend() &&
-                    (i == eLevenshteinOperator::equ ||
-                    i == eLevenshteinOperator::mod ||
-                    i == eLevenshteinOperator::rem))
-            {
-                diffResult[*it] = i == eLevenshteinOperator::rem ? eLevenshteinOperator::add : i;
-                it++;
-            }
+        diffMatrice = levenshteinShortestPath<JSonElement>(a, b);
     }
 }
 
@@ -665,11 +649,13 @@ const OutputFlag CurseSplitOutput::getFlag(const JSonElement *item, const JSonEl
     res.searched(std::find(search_result[selectedWin].cbegin(), search_result[selectedWin].cend(), item) != search_result[selectedWin].cend());
 
     try {
-        eLevenshteinOperator dr = diffResult.at(item);
+        /*
+        eLevenshteinOperator dr = LevenshteinCache<JSonElement>::instance()->get(item);
         if (dr == eLevenshteinOperator::add)
             res.type(OutputFlag::TYPE_STRING);
         else if (dr == eLevenshteinOperator::mod)
             res.type(OutputFlag::TYPE_NUMBER);
+        */
     }
     catch (std::out_of_range &e) {}
     /*

+ 0 - 3
src/levenshtein.cpp

@@ -33,6 +33,3 @@ bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b)
     return *a == b;
 }
 
-template<>
-LevenshteinCache<JSonElement *> *LevenshteinCache<JSonElement *>::_instance = nullptr;
-