Bladeren bron

[TMP BORKEN]
[refactor] removed LOT of dead code
[refactor] moved diff out of JSonElement

B Thibault 9 jaren geleden
bovenliggende
commit
c733c20cbb

+ 5 - 0
CMakeLists.txt

@@ -84,6 +84,11 @@ set_property(
 
 add_executable(levenshtein_test
     src/levenshtein.cpp
+    src/jsonContainer.cpp
+    src/jsonElement.cpp
+    src/jsonPrimitive.cpp
+    src/jsonObjectEntry.cpp
+    src/searchPattern.cpp
 
     test/src/levenshtein_test.cpp
     )

+ 3 - 1
include/curseSplitOutput.hh

@@ -4,6 +4,8 @@
 #include "curseOutput.hh"
 #include "levenshtein.hpp"
 
+class LevenshteinMatrice_base;
+
 class CurseSplitOutput: public CurseOutput
 {
     public:
@@ -80,7 +82,7 @@ class CurseSplitOutput: public CurseOutput
          * currently searching pattern and its results
         **/
         std::deque<std::list<const JSonElement*> > search_result;
-        LevenshteinMatrice_base *diffMatrice;
+        const LevenshteinMatrice_base *diffMatrice;
 
         /**
          * Viewport start

+ 0 - 1
include/jsonContainer.hh

@@ -15,7 +15,6 @@ class JSonContainer: public JSonElement, public std::list<JSonElement*>
         JSonContainer(JSonContainer *parent);
         virtual ~JSonContainer();
 
-        virtual float diff(const JSonElement *) const;
         virtual bool operator==(const JSonElement *) const;
 
         /**

+ 0 - 2
include/jsonElement.hh

@@ -22,8 +22,6 @@ class JSonElement
         **/
         virtual std::string stringify() const =0;
 
-        virtual float diff(const JSonElement *) const;
-
         /**
          * get the number of col string will output
         **/

+ 4 - 80
include/levenshtein.hpp

@@ -1,87 +1,11 @@
 #pragma once
 
 #include <string>
-#include <list>
-#include <limits.h>
-#include "jsonElement.hh"
-#include "levenshteinMatrice.hpp"
 
-#define LEVENSHTEIN_SENSIBILITY (0.7f)
+#ifndef  LEVENSHTEIN_SENSIBILITY
+# define LEVENSHTEIN_SENSIBILITY (0.7f)
+#endif //LEVENSHTEIN_SENSIBILITY
 
+size_t levenshtein(const std::string &a, const std::string &b);
 float levenshteinPercent(const std::string &a, const std::string &b);
-template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b);
-bool levenshteinStrictCompare(const char &a, const char &b);
-bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b);
-bool levenshteinCompare(const char &a, const char &b);
-bool levenshteinCompare(const JSonElement *a, const JSonElement *b);
-
-template<typename SIZE, class ITERATOR, class SUBTYPE>
-static LevenshteinMatrice<SIZE> *_levenshteinMatrice(const ITERATOR &aBegin, const ITERATOR &aEnd, const ITERATOR &bBegin, const ITERATOR &bEnd, const size_t lenA, const size_t lenB)
-{
-    size_t i, j;
-    LevenshteinMatrice<SIZE> *matrice = new LevenshteinMatrice<SIZE>(lenA, lenB);
-    ITERATOR a = aBegin;
-    ITERATOR b;
-
-    for (i =1; a != aEnd; ++i, ++a)
-    {
-        b = bBegin;
-        for (j =1; b != bEnd; ++j, ++b)
-            matrice->set(i, j, std::min(std::min(
-                    matrice->get(i -1, j) +1,
-                    matrice->get(i, j -1) +1),
-                    matrice->get(i -1, j -1) + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1)));
-    }
-    return matrice;
-};
-
-template<typename SIZE, typename ITERATOR, class SUBTYPE>
-static float _levenshteinPercent(ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin, ITERATOR bEnd, size_t lenA, size_t lenB)
-{
-    const size_t maxSize = std::max(lenA, lenB);
-    while (aBegin != aEnd && bBegin != bEnd && levenshteinCompare(*aBegin, *bBegin))
-    {
-        aBegin++;
-        bBegin++;
-        lenA--;
-        lenB--;
-    }
-    if (!lenA && !lenB) return 1.f;
-    if (!lenA) return (float) lenB / maxSize;
-    if (!lenB) return (float) lenA / maxSize;
-    LevenshteinMatrice<SIZE> *matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-    const SIZE result = matrice->result();
-
-    delete matrice;
-    return 1 - ((float)result / maxSize);
-};
-
-template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b)
-{
-    const size_t lenA = a->size();
-    const size_t lenB = b->size();
-    typename std::list<T*>::const_iterator aBegin = a->cbegin();
-    typename std::list<T*>::const_iterator aEnd = a->cend();
-    typename std::list<T*>::const_iterator bBegin = b->cbegin();
-    typename std::list<T*>::const_iterator bEnd = b->cend();
-
-    if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinPercent<unsigned char, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-    if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinPercent<unsigned short, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-    return _levenshteinPercent<unsigned int, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
-}
-
-template<class T>
-LevenshteinMatrice_base *levenshteinShortestPath(const std::list<T*> *a, const std::list<T *> *b)
-{
-    const size_t lenA = a->size();
-    const size_t lenB = b->size();
-
-    if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinMatrice<unsigned char, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
-    if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinMatrice<unsigned short, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
-    return _levenshteinMatrice<unsigned int, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
-}
 

+ 76 - 13
include/levenshteinMatrice.hpp

@@ -1,5 +1,8 @@
 #pragma once
 
+#include <iostream>
+#include <map>
+#include "jsonContainer.hh"
 #include "levenshtein.hpp"
 
 enum eLevenshteinOperator: char
@@ -14,36 +17,89 @@ class LevenshteinMatrice_base
 {
     public:
         virtual ~LevenshteinMatrice_base() {}
-        virtual void prune() =0;
+
+        const std::map<const JSonElement*, eLevenshteinOperator> path() const;
+        virtual size_t result() const =0;
+
+        virtual void debug(std::ostream &out) const =0;
+
+    public:
+        class Builder
+        {
+            public:
+                Builder();
+                ~Builder();
+
+                const LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
+        };
+
+    protected:
+        std::map<const JSonElement*, eLevenshteinOperator> operations;
+};
+
+class LevenshteinMatrice_manual: public LevenshteinMatrice_base
+{
+    public:
+        LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
+        size_t result() const;
+
+        void debug(std::ostream &out) const;
+
+    public:
+        size_t _result;
 };
 
 template<typename T>
 class LevenshteinMatrice: public LevenshteinMatrice_base
 {
     public:
-        LevenshteinMatrice(size_t n, size_t m)
+        LevenshteinMatrice(const JSonContainer::const_iterator aBegin, const JSonContainer::const_iterator aEnd,
+                const JSonContainer::const_iterator bBegin, const JSonContainer::const_iterator bEnd,
+                size_t n, size_t m)
         {
+            size_t i, j;
+            JSonContainer::const_iterator a = aBegin;
+            JSonContainer::const_iterator b;
+
             this->n = n;
             this->m = m;
             this->matrice = new T*[n +1]();
             this->subMatrice = new LevenshteinMatrice_base**[n +1]();
 
             matrice[0] = new T[m +1];
-            for (size_t i =1; i <= m; ++i)
+            for (i =1; i <= m; ++i)
                 matrice[0][i] = i;
 
-            for (size_t i=1; i <= n; ++i)
+            for (i=1; i <= n; ++i)
             {
                 matrice[i] = new T[m +1];
                 matrice[i][0] = i;
             }
 
-            for (size_t i=0; i <= n; ++i)
+            for (i=0; i <= n; ++i)
             {
                 subMatrice[i] = new LevenshteinMatrice_base*[m +1];
                 for (size_t j=0; j <= m; ++j)
                     subMatrice[i][j] = nullptr;
             }
+
+            for (i =1; a != aEnd; ++i, ++a)
+            {
+                b = bBegin;
+                for (j =1; b != bEnd; ++j, ++b)
+                {
+                    //TODO compute submatrice
+                    /*
+                    matrice[i][j] = std::min(std::min(
+                        get(i -1, j) +1,
+                        get(i, j -1) +1),
+                        get(i -1, j -1) + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1)); // TODO set submatrice
+                    */
+                    matrice[i][j] = std::min(
+                        get(i -1, j) +1,
+                        get(i, j -1) +1);
+                }
+            }
         };
 
         ~LevenshteinMatrice()
@@ -70,12 +126,6 @@ class LevenshteinMatrice: public LevenshteinMatrice_base
             return matrice[a][b];
         };
 
-        void set(size_t a, size_t b, T value, LevenshteinMatrice_base *subMatrice =nullptr)
-        {
-            matrice[a][b] = value;
-            this->subMatrice[a][b] = subMatrice;
-        };
-
         std::list<eLevenshteinOperator> shortestPath() const
         {
             std::list<eLevenshteinOperator> result;
@@ -115,13 +165,26 @@ class LevenshteinMatrice: public LevenshteinMatrice_base
             return result;
         }
 
-        T result() const
+        void debug(std::ostream &o) const
+        {
+            for (size_t i =0; i <= n; ++i)
+            {
+                for (size_t j=0; j <= m; ++j)
+                    o << (int) (matrice[n][m]) << '\t';
+                o << std::endl;
+            }
+        }
+
+        size_t result() const
         {
-            return matrice[n][m];
+            return (size_t) matrice[n][m];
         };
 
     private:
         T **matrice;
+        /**
+         * Usefull only on `modify' operation
+        **/
         LevenshteinMatrice_base ***subMatrice;
 
         size_t n;

+ 4 - 1
src/curseSplitOutput.cpp

@@ -14,6 +14,7 @@
 #include "jsonObject.hh"
 #include "jsonArray.hh"
 #include "jsonPrimitive.hh"
+#include "levenshteinMatrice.hpp"
 
 CurseSplitOutput::CurseSplitOutput(const Params &p): CurseOutput(p)
 {
@@ -91,7 +92,9 @@ void CurseSplitOutput::computeDiff()
     }
     else
     {
-        diffMatrice = levenshteinShortestPath<JSonElement>(a, b);
+        LevenshteinMatrice_base::Builder builder;
+        diffMatrice = builder.build(roots.at(0), roots.at(1));
+        diffMatrice->debug(std::cout);
     }
 }
 

+ 0 - 8
src/jsonContainer.cpp

@@ -5,7 +5,6 @@
 **/
 
 #include "jsonContainer.hh"
-#include "levenshtein.hpp"
 
 JSonContainer::JSonContainer(JSonContainer *p):JSonElement(p)
 { }
@@ -13,13 +12,6 @@ JSonContainer::JSonContainer(JSonContainer *p):JSonElement(p)
 JSonContainer::~JSonContainer()
 { }
 
-float JSonContainer::diff(const JSonElement *other) const
-{
-    if (!dynamic_cast<const JSonContainer *> (other))
-        return 0.f;
-    return levenshteinPercent<JSonElement>(this, (const JSonContainer*)other);
-}
-
 bool JSonContainer::operator==(const JSonElement *other) const
 {
     if (!dynamic_cast<const JSonContainer *> (other) || size() != ((const JSonContainer*)other)->size())

+ 0 - 8
src/jsonElement.cpp

@@ -108,14 +108,6 @@ bool JSonElement::match(const SearchPattern &searchPattern) const
     return searchPattern.match(stringify(), this);
 }
 
-float JSonElement::diff(const JSonElement *o) const
-{
-    if (dynamic_cast<const JSonContainer*>(o) ||
-            dynamic_cast<const JSonObjectEntry*>(o))
-        return 0.f;
-    return levenshteinPercent(stringify(), o->stringify());
-}
-
 bool JSonElement::operator==(const JSonElement *o) const
 {
     return stringify() == o->stringify();

+ 86 - 18
src/levenshtein.cpp

@@ -1,35 +1,103 @@
-#include "levenshtein.hpp"
+#include <climits>
+#include "levenshteinMatrice.hpp"
+
+size_t levenshtein(const std::string &a, const std::string &b)
+{
+    int **matrice = new int*[a.size() +1]();
+    matrice[0] = new int[b.size() +1]();
+    for (size_t j=0; j <= b.size(); j++)
+        matrice[0][j] = j;
+    for (size_t i=1; i <= a.size(); ++i)
+    {
+        matrice[i] = new int[b.size() +1]();
+        matrice[i][0] = i;
+        for (size_t j=1; j <= b.size(); ++j)
+            matrice[i][j] = std::min(std::min(
+                    matrice[i -1][j] +1,
+                    matrice[i][j -1] +1),
+                    matrice[i -1][j -1] + (a[i] == b[j] ? 0 : 1));
+    }
+
+    const size_t result = matrice[a.size()][b.size()];
+    for (size_t i=0; i <= a.size(); ++i)
+        delete []matrice[i];
+    delete[] matrice;
+    return result;
+}
 
 float levenshteinPercent(const std::string &a, const std::string &b)
 {
-    const size_t lenA = a.size();
-    const size_t lenB = b.size();
-
-    if (a == b) return 1.f;
-    if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinPercent<unsigned char, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
-    if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinPercent<unsigned short, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
-    return _levenshteinPercent<unsigned int, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
+    if (a.empty() && b.empty())
+        return 1.f;
+    return 1 - (levenshtein(a, b) / std::max(a.size(), b.size()));
 }
 
-bool levenshteinCompare(const char &a, const char &b)
+/**
+ * Levenshtein Matrice Builder stuff
+**/
+LevenshteinMatrice_base::Builder::Builder()
+{ }
+
+LevenshteinMatrice_base::Builder::~Builder()
+{ }
+
+const LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonElement *a, const JSonElement *b) const
 {
-    return a == b;
+    const bool aIsContainer = ((dynamic_cast<const JSonContainer*>(a)) != nullptr);
+    const bool bIsContainer = ((dynamic_cast<const JSonContainer*>(b)) != nullptr);
+
+    if (aIsContainer && bIsContainer)
+    {
+        const size_t lenA = ((const JSonContainer*) a)->size();
+        const size_t lenB = ((const JSonContainer*) b)->size();
+
+        const JSonContainer::const_iterator aBegin = ((const JSonContainer*)a)->cbegin();
+        const JSonContainer::const_iterator aEnd = ((const JSonContainer*)a)->cend();
+        const JSonContainer::const_iterator bBegin = ((const JSonContainer*)b)->cbegin();
+        const JSonContainer::const_iterator bEnd = ((const JSonContainer*)b)->cend();
+
+        if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
+            return new LevenshteinMatrice<unsigned char>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+        if (lenA < USHRT_MAX && lenB < USHRT_MAX)
+            return new LevenshteinMatrice<unsigned short>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+        return new LevenshteinMatrice<unsigned int>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+    }
+    else if (aIsContainer)
+    {
+        LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
+        result->_result = 2;
+        return result->add(a, eLevenshteinOperator::rem)
+            ->add(b, eLevenshteinOperator::add);
+    }
+    else if (bIsContainer)
+    {
+        LevenshteinMatrice_manual *result = new LevenshteinMatrice_manual();
+        result->_result = 2;
+        return result->add(b, eLevenshteinOperator::rem)
+            ->add(a, eLevenshteinOperator::add);
+    }
+    else
+    {
+        // TODO a and b are both (primitive or objectEntries)
+    }
 }
 
-bool levenshteinCompare(const JSonElement *a, const JSonElement *b)
+/**
+ * Manual matrice
+**/
+LevenshteinMatrice_manual *LevenshteinMatrice_manual::add(const JSonElement *a, eLevenshteinOperator b)
 {
-    return a->diff(b) > LEVENSHTEIN_SENSIBILITY;
+    operations[a] = b;
+    return this;
 }
 
-bool levenshteinStrictCompare(const char &a, const char &b)
+void LevenshteinMatrice_manual::debug(std::ostream &out) const
 {
-    return a == b;
+    out << "(MANUAL - no data)" << std::endl;
 }
 
-bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b)
+size_t LevenshteinMatrice_manual::result() const
 {
-    return *a == b;
+    return _result;
 }