Pārlūkot izejas kodu

[minor] removed debug
[refactor] LevenshteinMatrice: moved template to builder
[bugfix] removed memory leak or bad memory usage
[add] recurse eLevenshteinOperator to parents on equ/mod operations
[removed] removed eLevenshteinOperator in operation list (missing elements does not exists)
[quickfix] added weight to missing containers

B Thibault 9 gadi atpakaļ
vecāks
revīzija
f23590b7b6

+ 72 - 91
include/levenshteinMatrice.hpp

@@ -18,11 +18,11 @@ class LevenshteinMatrice_base
     public:
         virtual ~LevenshteinMatrice_base() {}
 
-        const std::map<const JSonElement*, eLevenshteinOperator> path() const;
+        virtual const std::map<const JSonElement*, eLevenshteinOperator> path() const;
         virtual size_t result() const =0;
         virtual bool areSimilar() const =0;
 
-        virtual void debug(std::ostream &out) const =0;
+        eLevenshteinOperator get(const JSonElement *) const;
 
     public:
         class Builder
@@ -45,8 +45,6 @@ class LevenshteinMatrice_manual: public LevenshteinMatrice_base
         size_t result() const;
         bool areSimilar() const;
 
-        void debug(std::ostream &out) const;
-
     public:
         size_t _result;
 };
@@ -57,163 +55,146 @@ class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
         LevenshteinMatriceWithScore(float score);
 
         size_t result() const;
-        void debug(std::ostream &out) const;
         bool areSimilar() const;
 
     private:
         bool _result;
 };
 
-template<typename T>
 class LevenshteinMatrice: public LevenshteinMatrice_base
 {
     public:
-        LevenshteinMatrice(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator aEnd,
-                const JSonContainer::const_iterator bBegin, const JSonContainer::const_iterator bEnd,
-                size_t n, size_t m)
+        template<typename T>
+        static LevenshteinMatrice *build(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator bBegin,
+                const size_t n, const size_t m)
         {
+            LevenshteinMatrice *result = new LevenshteinMatrice();
             size_t i, j;
             JSonContainer::const_iterator a = aBegin;
             JSonContainer::const_iterator b;
             LevenshteinMatrice_base::Builder matriceBuilder;
 
-            this->n = n;
-            this->m = m;
-            this->matrice = new T*[n +1]();
-            this->subMatrice = new LevenshteinMatrice_base**[n +1]();
+            T **matrice = new T*[n +1]();
+            LevenshteinMatrice_base ***subMatrice = new LevenshteinMatrice_base**[n]();
 
             matrice[0] = new T[m +1];
-            for (i =1; i <= m; ++i)
+            for (i =0; i <= m; ++i)
                 matrice[0][i] = i;
 
             for (i=1; i <= n; ++i)
             {
                 matrice[i] = new T[m +1];
                 matrice[i][0] = i;
+                subMatrice[i -1] = new LevenshteinMatrice_base*[m];
+                for (size_t j=0; j < m; ++j)
+                    subMatrice[i -1][j] = nullptr;
             }
 
-            for (i=0; i <= n; ++i)
-            {
-                subMatrice[i] = new LevenshteinMatrice_base*[m +1];
-                for (size_t j=0; j <= m; ++j)
-                    subMatrice[i][j] = nullptr;
-            }
-
-            for (i =0; a != aEnd; ++i, ++a)
+            for (i =1; i <= n; ++i, ++a)
             {
                 b = bBegin;
-                for (j =0; b != bEnd; ++j, ++b)
+                for (j =1; j <= m; ++j, ++b)
                 {
-                    LevenshteinMatrice_base *subMatrice = matriceBuilder.build(*a, *b);
-                    if (subMatrice != nullptr)
+                    LevenshteinMatrice_base *_subMatrice = matriceBuilder.build(*a, *b);
+                    if (_subMatrice != nullptr)
                     {
-                        const T chCost = get(i, j) + (subMatrice->areSimilar() ? 0 : 1);
+                        const T chCost = matrice[i -1][j -1] + (_subMatrice->areSimilar() ? 0 : 1);
 
-                        if (chCost <= get(i, j +1) +1 && chCost <= get(i +1, j))
+                        if (chCost <= matrice[i -1][j] +1 &&
+                                chCost <= matrice[i][j -1] +1)
                         {
-                            matrice[i +1][j +1] = chCost;
-                            this->subMatrice[i +1][j +1] = subMatrice;
+                            matrice[i][j] = chCost;
+                            subMatrice[i -1][j -1] = _subMatrice;
                             continue;
                         }
-                        delete subMatrice;
+                        delete _subMatrice;
                     } // Change is not worth or subMatrice is null (eg. a and b has different types)
-                    matrice[i +1][j +1] = std::min(get(i, j +1), get(i +1, j)) +1;
+                    matrice[i][j] = std::min(matrice[i -1][j], matrice[i][j -1]) +1;
                 }
             }
+
+            result->levenDist = matrice[n][m];
+            result->levenRelativeDist = 1 -(matrice[n][m] / std::max(n, m));
+            result->shortestPath<T>(matrice, subMatrice, n, m, --a, --b);
+            cleanMatrice(matrice, subMatrice, n, m);
+            return result;
         };
 
-        ~LevenshteinMatrice()
+        template<typename T>
+        static void cleanMatrice(T **matrice, LevenshteinMatrice_base ***subMatrice, const size_t &n, const size_t &m)
         {
             for (size_t i=0; i <= n; ++i)
             {
                 delete []matrice[i];
-                for (size_t j=0; j <= m; ++j)
-                    if (subMatrice[i][j])
-                        delete subMatrice[i][j];
-                delete []subMatrice[i];
+                if (i != n)
+                {
+                    for (size_t j=0; j < m; ++j)
+                        if (subMatrice[i][j])
+                            delete subMatrice[i][j];
+                    delete []subMatrice[i];
+                }
             }
             delete []matrice;
             delete []subMatrice;
         };
 
-        void prune()
-        {
-            //TODO
-        }
-
-        T get(size_t a, size_t b) const
-        {
-            return matrice[a][b];
-        };
+        size_t result() const;
+        bool areSimilar() const;
 
-        std::list<eLevenshteinOperator> shortestPath() const
+    private:
+        template<typename T>
+        void shortestPath(T **matrice,
+                LevenshteinMatrice_base ***subMatrice,
+                size_t _i, size_t _j,
+                JSonContainer::const_iterator i, JSonContainer::const_iterator j)
         {
-            std::list<eLevenshteinOperator> result;
-
-            size_t i = n;
-            size_t j = m;
-
-            while (i || j)
+            while (_i || _j)
             {
-                if (i && (!j || matrice[i][j] > matrice[i-1][j]))
+                if (_i && (!_j || matrice[_i][_j] > matrice[_i-1][_j]))
                 {
-                    result.push_front(eLevenshteinOperator::add);
+                    operations[*i] = eLevenshteinOperator::add;
                     --i;
+                    --_i;
                 }
-                else if (j && (!i || matrice[i][j] > matrice[i][j -1]))
+                else if (_j && (!_i || matrice[_i][_j] > matrice[_i][_j -1]))
                 {
-                    result.push_front(eLevenshteinOperator::rem);
+                    operations[*j] = eLevenshteinOperator::add;
                     --j;
+                    --_j;
                 }
-                else if (i && j)
+                else if (_i && _j)
                 {
-                    result.push_front(matrice[i][j] == matrice[i-1][j-1] ? eLevenshteinOperator::equ : eLevenshteinOperator::mod);
+                    eLevenshteinOperator op =
+                        matrice[_i][_j] == matrice[_i -1][_j -1] ?
+                        eLevenshteinOperator::equ :
+                        eLevenshteinOperator::mod;
+                    operations[*i] = operations[*j] = op;
+                    for (std::pair<const JSonElement *, eLevenshteinOperator> e : subMatrice[_i -1][_j -1]->path())
+                        operations[e.first] = e.second;
                     --i;
                     --j;
+                    --_i;
+                    --_j;
                 }
-                else if (i)
+                else if (_i)
                 {
-                    result.push_front(eLevenshteinOperator::add);
+                    operations[*i] = eLevenshteinOperator::add;
                     --i;
+                    --_i;
                 }
-                else if (j)
+                else if (_j)
                 {
-                    result.push_front(eLevenshteinOperator::rem);
+                    operations[*j] = eLevenshteinOperator::add;
                     --j;
+                    --_j;
                 }
             }
-            return result;
-        }
-
-        void debug(std::ostream &o) const
-        {
-            for (size_t i =0; i <= n; ++i)
-            {
-                for (size_t j=0; j <= m; ++j)
-                    o << (int) (matrice[n][m]) << '\t';
-                o << std::endl;
-            }
         }
 
-        size_t result() const
-        {
-            return (size_t) matrice[n][m];
-        };
-
-        bool areSimilar() const
-        {
-            float levenRelativeDist = 1 -(result() / std::max(n, m));
-            return levenRelativeDist > LEVENSHTEIN_SENSIBILITY;
-        }
 
     private:
-        T **matrice;
-        /**
-         * Usefull only on `modify' operation
-        **/
-        LevenshteinMatrice_base ***subMatrice;
-
-        size_t n;
-        size_t m;
+        LevenshteinMatrice();
+        size_t levenDist;
+        float levenRelativeDist;
 };
 

+ 5 - 6
src/curseSplitOutput.cpp

@@ -81,7 +81,6 @@ void CurseSplitOutput::computeDiff()
         diffMatrice = builder.build(roots.at(0), roots.at(1));
     else if (roots.size() == 3)
         throw std::runtime_error("3-input diff not implemented"); //TODO
-    diffMatrice->debug(std::cout);
 }
 
 inputResult CurseSplitOutput::selectUp()
@@ -638,13 +637,13 @@ const OutputFlag CurseSplitOutput::getFlag(const JSonElement *item, const JSonEl
     res.searched(std::find(search_result[selectedWin].cbegin(), search_result[selectedWin].cend(), item) != search_result[selectedWin].cend());
 
     try {
-        /*
-        eLevenshteinOperator dr = LevenshteinCache<JSonElement>::instance()->get(item);
+        eLevenshteinOperator dr = diffMatrice->get(item);
         if (dr == eLevenshteinOperator::add)
-            res.type(OutputFlag::TYPE_STRING);
-        else if (dr == eLevenshteinOperator::mod)
             res.type(OutputFlag::TYPE_NUMBER);
-        */
+        else if (dr == eLevenshteinOperator::rem)
+            res.type(OutputFlag::TYPE_BOOL);
+        else if (dr == eLevenshteinOperator::mod)
+            res.type(OutputFlag::TYPE_STRING);
     }
     catch (std::out_of_range &e) {}
     /*

+ 28 - 17
src/levenshtein.cpp

@@ -53,27 +53,25 @@ LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonEleme
         const size_t lenB = ((const JSonContainer*) b)->size();
 
         const JSonContainer::const_iterator aBegin = ((const JSonContainer*)a)->cbegin();
-        const JSonContainer::const_iterator aEnd = ((const JSonContainer*)a)->cend();
         const JSonContainer::const_iterator bBegin = ((const JSonContainer*)b)->cbegin();
-        const JSonContainer::const_iterator bEnd = ((const JSonContainer*)b)->cend();
 
         if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-            return new LevenshteinMatrice<unsigned char>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+            return LevenshteinMatrice::build<unsigned char>(aBegin, bBegin, lenA, lenB);
         if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-            return new LevenshteinMatrice<unsigned short>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-        return new LevenshteinMatrice<unsigned int>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+            return LevenshteinMatrice::build<unsigned short>(aBegin, bBegin, lenA, lenB);
+        return LevenshteinMatrice::build<unsigned int>(aBegin, bBegin, lenA, lenB);
     }
     else if (aIsContainer)
     {
         LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
-        result->_result = 2;
+        result->_result = ((JSonContainer*)a)->size() +1; //TODO recursive number of all descendants
         return result->add(a, eLevenshteinOperator::rem)
             ->add(b, eLevenshteinOperator::add);
     }
     else if (bIsContainer)
     {
         LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
-        result->_result = 2;
+        result->_result = ((JSonContainer*)b)->size() +1; //TODO recursive number of all descendants
         return result->add(b, eLevenshteinOperator::rem)
             ->add(a, eLevenshteinOperator::add);
     }
@@ -90,6 +88,29 @@ LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonEleme
     }
 }
 
+eLevenshteinOperator LevenshteinMatrice_base::get(const JSonElement *e) const
+{
+    return operations.at(e);
+}
+
+/**
+ * base (generic) Matrice
+**/
+const std::map<const JSonElement*, eLevenshteinOperator> LevenshteinMatrice_base::path() const
+{ return operations; }
+
+/**
+ * Normal matrice
+**/
+LevenshteinMatrice::LevenshteinMatrice()
+{ }
+
+size_t LevenshteinMatrice::result() const
+{ return levenDist; }
+
+bool LevenshteinMatrice::areSimilar() const
+{ return levenRelativeDist > LEVENSHTEIN_SENSIBILITY; }
+
 /**
  * Manual matrice
 **/
@@ -99,11 +120,6 @@ LevenshteinMatrice_manual *LevenshteinMatrice_manual::add(const JSonElement *a,
     return this;
 }
 
-void LevenshteinMatrice_manual::debug(std::ostream &out) const
-{
-    out << "(MANUAL - no data)" << std::endl;
-}
-
 size_t LevenshteinMatrice_manual::result() const
 {
     return _result;
@@ -122,11 +138,6 @@ LevenshteinMatriceWithScore::LevenshteinMatriceWithScore(float s)
     _result = s > LEVENSHTEIN_SENSIBILITY;
 }
 
-void LevenshteinMatriceWithScore::debug(std::ostream &out) const
-{
-    out << "Comparing two raw types gave " << (_result ? "=" : "!=") << std::endl;
-}
-
 size_t LevenshteinMatriceWithScore::result() const
 {
     return _result ? 0 : 1;

+ 5 - 0
test/testDiffAdd.1.json

@@ -0,0 +1,5 @@
+[
+    1,
+    2,
+    3
+]

+ 4 - 0
test/testDiffAdd.2.json

@@ -0,0 +1,4 @@
+[
+    1,
+    3
+]

+ 5 - 0
test/testDiffMod.1.json

@@ -0,0 +1,5 @@
+[
+    1,
+    2,
+    3
+]

+ 5 - 0
test/testDiffMod.2.json

@@ -0,0 +1,5 @@
+[
+    1,
+    6,
+    3
+]

+ 4 - 0
test/testDiffRm.1.json

@@ -0,0 +1,4 @@
+[
+    1,
+    3
+]

+ 5 - 0
test/testDiffRm.2.json

@@ -0,0 +1,5 @@
+[
+    1,
+    2,
+    3
+]

+ 9 - 0
test/testDiffSubAdd.2.json

@@ -0,0 +1,9 @@
+[
+    1,
+    [
+        1,
+        2,
+        3
+    ],
+    3
+]

+ 8 - 0
test/testDiffSubAdd.json

@@ -0,0 +1,8 @@
+[
+    1,
+    [
+        1,
+        3
+    ],
+    3
+]