Bläddra i källkod

[FIX] levenshtein algo

B Thibault 9 år sedan
förälder
incheckning
ab8cdeb196
3 ändrade filer med 83 tillägg och 35 borttagningar
  1. 39 13
      include/levenshteinMatrice.hpp
  2. 5 19
      src/curseSplitOutput.cpp
  3. 39 3
      src/levenshtein.cpp

+ 39 - 13
include/levenshteinMatrice.hpp

@@ -20,6 +20,7 @@ class LevenshteinMatrice_base
 
         const std::map<const JSonElement*, eLevenshteinOperator> path() const;
         virtual size_t result() const =0;
+        virtual bool areSimilar() const =0;
 
         virtual void debug(std::ostream &out) const =0;
 
@@ -30,7 +31,7 @@ class LevenshteinMatrice_base
                 Builder();
                 ~Builder();
 
-                const LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
+                LevenshteinMatrice_base *build(const JSonElement *a, const JSonElement *b) const;
         };
 
     protected:
@@ -42,6 +43,7 @@ class LevenshteinMatrice_manual: public LevenshteinMatrice_base
     public:
         LevenshteinMatrice_manual *add(const JSonElement*, eLevenshteinOperator);
         size_t result() const;
+        bool areSimilar() const;
 
         void debug(std::ostream &out) const;
 
@@ -49,6 +51,19 @@ class LevenshteinMatrice_manual: public LevenshteinMatrice_base
         size_t _result;
 };
 
+class LevenshteinMatriceWithScore: public LevenshteinMatrice_base
+{
+    public:
+        LevenshteinMatriceWithScore(float score);
+
+        size_t result() const;
+        void debug(std::ostream &out) const;
+        bool areSimilar() const;
+
+    private:
+        bool _result;
+};
+
 template<typename T>
 class LevenshteinMatrice: public LevenshteinMatrice_base
 {
@@ -60,6 +75,7 @@ class LevenshteinMatrice: public LevenshteinMatrice_base
             size_t i, j;
             JSonContainer::const_iterator a = aBegin;
             JSonContainer::const_iterator b;
+            LevenshteinMatrice_base::Builder matriceBuilder;
 
             this->n = n;
             this->m = m;
@@ -83,21 +99,25 @@ class LevenshteinMatrice: public LevenshteinMatrice_base
                     subMatrice[i][j] = nullptr;
             }
 
-            for (i =1; a != aEnd; ++i, ++a)
+            for (i =0; a != aEnd; ++i, ++a)
             {
                 b = bBegin;
-                for (j =1; b != bEnd; ++j, ++b)
+                for (j =0; b != bEnd; ++j, ++b)
                 {
-                    //TODO compute submatrice
-                    /*
-                    matrice[i][j] = std::min(std::min(
-                        get(i -1, j) +1,
-                        get(i, j -1) +1),
-                        get(i -1, j -1) + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1)); // TODO set submatrice
-                    */
-                    matrice[i][j] = std::min(
-                        get(i -1, j) +1,
-                        get(i, j -1) +1);
+                    LevenshteinMatrice_base *subMatrice = matriceBuilder.build(*a, *b);
+                    if (subMatrice != nullptr)
+                    {
+                        const T chCost = get(i, j) + (subMatrice->areSimilar() ? 0 : 1);
+
+                        if (chCost <= get(i, j +1) +1 && chCost <= get(i +1, j))
+                        {
+                            matrice[i +1][j +1] = chCost;
+                            this->subMatrice[i +1][j +1] = subMatrice;
+                            continue;
+                        }
+                        delete subMatrice;
+                    } // Change is not worth or subMatrice is null (eg. a and b has different types)
+                    matrice[i +1][j +1] = std::min(get(i, j +1), get(i +1, j)) +1;
                 }
             }
         };
@@ -180,6 +200,12 @@ class LevenshteinMatrice: public LevenshteinMatrice_base
             return (size_t) matrice[n][m];
         };
 
+        bool areSimilar() const
+        {
+            float levenRelativeDist = 1 -(result() / std::max(n, m));
+            return levenRelativeDist > LEVENSHTEIN_SENSIBILITY;
+        }
+
     private:
         T **matrice;
         /**

+ 5 - 19
src/curseSplitOutput.cpp

@@ -74,28 +74,14 @@ void CurseSplitOutput::loop()
     }
 }
 
-// FIXME Will fail if 3 inputs
 void CurseSplitOutput::computeDiff()
 {
-    const JSonContainer *a = dynamic_cast<const JSonContainer*>(roots.at(0));
-    const JSonContainer *b = dynamic_cast<const JSonContainer*>(roots.at(1));
-
-    if (!a && !b)
-    {
-        //TODO diff primitives
-    }
-    else if (!a)
-    {
-    }
-    else if (!b)
-    {
-    }
-    else
-    {
-        LevenshteinMatrice_base::Builder builder;
+    LevenshteinMatrice_base::Builder builder;
+    if (roots.size() == 2)
         diffMatrice = builder.build(roots.at(0), roots.at(1));
-        diffMatrice->debug(std::cout);
-    }
+    else if (roots.size() == 3)
+        throw std::runtime_error("3-input diff not implemented"); //TODO
+    diffMatrice->debug(std::cout);
 }
 
 inputResult CurseSplitOutput::selectUp()

+ 39 - 3
src/levenshtein.cpp

@@ -1,5 +1,6 @@
 #include <climits>
 #include "levenshteinMatrice.hpp"
+#include "jsonObjectEntry.hh"
 
 size_t levenshtein(const std::string &a, const std::string &b)
 {
@@ -15,7 +16,7 @@ size_t levenshtein(const std::string &a, const std::string &b)
             matrice[i][j] = std::min(std::min(
                     matrice[i -1][j] +1,
                     matrice[i][j -1] +1),
-                    matrice[i -1][j -1] + (a[i] == b[j] ? 0 : 1));
+                    matrice[i -1][j -1] + (a[i -1] == b[j -1] ? 0 : 1));
     }
 
     const size_t result = matrice[a.size()][b.size()];
@@ -41,7 +42,7 @@ LevenshteinMatrice_base::Builder::Builder()
 LevenshteinMatrice_base::Builder::~Builder()
 { }
 
-const LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonElement *a, const JSonElement *b) const
+LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSonElement *a, const JSonElement *b) const
 {
     const bool aIsContainer = ((dynamic_cast<const JSonContainer*>(a)) != nullptr);
     const bool bIsContainer = ((dynamic_cast<const JSonContainer*>(b)) != nullptr);
@@ -78,7 +79,14 @@ const LevenshteinMatrice_base *LevenshteinMatrice_base::Builder::build(const JSo
     }
     else
     {
-        // TODO a and b are both (primitive or objectEntries)
+        const bool aIsObject = ((dynamic_cast<const JSonObjectEntry*>(a)) != nullptr);
+        const bool bIsObject = ((dynamic_cast<const JSonObjectEntry*>(b)) != nullptr);
+        float result = levenshteinPercent(a->stringify(), b->stringify());
+
+        if (aIsObject && bIsObject) {
+            result *= levenshteinPercent((*(const JSonObjectEntry&)(*a))->stringify(), (*(const JSonObjectEntry&)(*b))->stringify());
+        }
+        return new LevenshteinMatriceWithScore(result);
     }
 }
 
@@ -101,3 +109,31 @@ size_t LevenshteinMatrice_manual::result() const
     return _result;
 }
 
+bool LevenshteinMatrice_manual::areSimilar() const
+{
+    return false;
+}
+
+/**
+ * Score matrice
+**/
+LevenshteinMatriceWithScore::LevenshteinMatriceWithScore(float s)
+{
+    _result = s > LEVENSHTEIN_SENSIBILITY;
+}
+
+void LevenshteinMatriceWithScore::debug(std::ostream &out) const
+{
+    out << "Comparing two raw types gave " << (_result ? "=" : "!=") << std::endl;
+}
+
+size_t LevenshteinMatriceWithScore::result() const
+{
+    return _result ? 0 : 1;
+}
+
+bool LevenshteinMatriceWithScore::areSimilar() const
+{
+    return _result;
+}
+