Sfoglia il codice sorgente

[refactor] optim skip heading N identical items

isundil 9 anni fa
parent
commit
7381bf47eb

+ 1 - 0
include/jsonContainer.hh

@@ -16,6 +16,7 @@ class JSonContainer: public JSonElement, public std::list<JSonElement*>
         virtual ~JSonContainer();
 
         virtual float diff(const JSonElement *) const;
+        virtual bool operator==(const JSonElement *) const;
 
         /**
          * Get the first item of this container

+ 1 - 0
include/jsonElement.hh

@@ -57,6 +57,7 @@ class JSonElement
          * check if this element match SearchQuery
         **/
         virtual bool match(const SearchPattern &) const;
+        virtual bool operator==(const JSonElement *other) const;
 
     private:
         JSonElement();

+ 1 - 0
include/jsonObjectEntry.hh

@@ -19,6 +19,7 @@ class JSonObjectEntry: public JSonElement
         std::string stringify() const;
 
         bool operator==(const std::string &) const;
+        bool operator==(const JSonElement *) const;
         bool operator<(const JSonElement *) const;
         bool operator<(const JSonElement &) const;
         /**

+ 57 - 30
include/levenshtein.hh

@@ -7,71 +7,86 @@
 
 #include <iostream>
 
+#define LEVENSHTEIN_SENSIBILITY (0.7f)
+
 float levenshteinPercent(const std::string &a, const std::string &b);
 template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b);
+bool levenshteinStrictCompare(const char &a, const char &b);
+bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b);
 bool levenshteinCompare(const char &a, const char &b);
 bool levenshteinCompare(const JSonElement *a, const JSonElement *b);
 
-template<typename SIZE, class T, class SUBTYPE>
-static SIZE **_levenshteinMatrice(const T *a, const T *b, const size_t lenA, const size_t lenB)
+template<typename SIZE, class ITERATOR, class SUBTYPE>
+static SIZE **_levenshteinMatrice(const ITERATOR &aBegin, const ITERATOR &aEnd, const ITERATOR &bBegin, const ITERATOR &bEnd, const size_t lenA, const size_t lenB)
 {
     size_t i, j;
     SIZE **matrice = new SIZE*[lenA +1]();
+    ITERATOR a = aBegin;
+    ITERATOR b;
 
     matrice[0] = new SIZE[lenB +1]();
     for (j=0; j <= lenB; j++)
         matrice[0][j] = j;
-    i = 1;
-    for (SUBTYPE it: *a)
+    for (i =1; a != aEnd; ++i, ++a)
     {
-        j =1;
         matrice[i] = new SIZE[lenB +1]();
         matrice[i][0] = i;
-        for (SUBTYPE jt: *b)
-        {
+        b = bBegin;
+        for (j =1; b != bEnd; ++j, ++b)
             matrice[i][j] = std::min(std::min(
                     matrice[i -1][j] +1,
                     matrice[i][j -1] +1),
-                    matrice[i -1][j -1] + ((levenshteinCompare(it, jt) > .9f) ? 0 : 1));
-            j++;
-        }
-        i++;
+                    matrice[i -1][j -1] + ((levenshteinCompare(*a, *b) > LEVENSHTEIN_SENSIBILITY) ? 0 : 1));
     }
 
+    std::cerr << "<------" << std::endl;
     for (size_t i=0; i <= lenA; ++i)
     {
         for (size_t j=0; j <= lenB; ++j)
             std::cerr << (size_t) matrice[i][j] << "\t";
-        std::cerr << std::endl << "[";
+        std::cerr << std::endl;
     }
     return matrice;
 };
 
-template<typename SIZE, class T, class SUBTYPE>
-static SIZE _levenshteinPercent(const T *a, const T *b, const size_t lenA, const size_t lenB)
+template<typename SIZE, typename ITERATOR, class SUBTYPE>
+static float _levenshteinPercent(ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin, ITERATOR bEnd, size_t lenA, size_t lenB)
 {
-    if (lenA == 0) return lenB;
-    if (lenB == 0) return lenA;
-    SIZE **matrice = _levenshteinMatrice<SIZE, T, SUBTYPE>(a, b, lenA, lenB);
+    const size_t maxSize = std::max(lenA, lenB);
+    while (aBegin != aEnd && bBegin != bEnd && levenshteinStrictCompare(*aBegin, *bBegin))
+    {
+        aBegin++;
+        bBegin++;
+        lenA--;
+        lenB--;
+    }
+    if (!lenA && !lenB) return 1.f;
+    if (!lenA) return (float) lenB / maxSize;
+    if (!lenB) return (float) lenA / maxSize;
+    SIZE **matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
     size_t i;
     const SIZE result = matrice[lenA][lenB];
 
     for (i=0; i < lenA; ++i)
         delete[] matrice[i];
     delete[] matrice;
-    return 1 - (result / std::max(lenA, lenB));
+    return 1 - ((float)result / maxSize);
 };
 
 template<class T> float levenshteinPercent(const std::list<T *> *a, const std::list<T *> *b)
 {
     const size_t lenA = a->size();
     const size_t lenB = b->size();
+    typename std::list<T*>::const_iterator aBegin = a->cbegin();
+    typename std::list<T*>::const_iterator aEnd = a->cend();
+    typename std::list<T*>::const_iterator bBegin = b->cbegin();
+    typename std::list<T*>::const_iterator bEnd = b->cend();
 
     if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinPercent<unsigned char, std::list<T *>, T *>(a, b, lenA, lenB);
+        return _levenshteinPercent<unsigned char, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
     if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinPercent<unsigned short, std::list<T *>, T *>(a, b, lenA, lenB);
-    return _levenshteinPercent<unsigned int, std::list<T *>, T *>(a, b, lenA, lenB);
+        return _levenshteinPercent<unsigned short, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
+    return _levenshteinPercent<unsigned int, typename std::list<T *>::const_iterator, T *>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
 }
 
 enum ePath: char
@@ -82,18 +97,30 @@ enum ePath: char
     equ = '='
 };
 
-template<typename SIZE, class T, class SUBTYPE>
-static std::list<ePath> _levenshteinShortestPath(const T *a, const T *b, const size_t lenA, const size_t lenB)
+template<typename SIZE, class ITERATOR, class SUBTYPE>
+static std::list<ePath> _levenshteinShortestPath(ITERATOR aBegin, ITERATOR aEnd, ITERATOR bBegin, ITERATOR bEnd, size_t lenA, size_t lenB)
 {
     std::list<ePath> result(std::max(lenA, lenB));
 
-    if (lenA == 0 || lenB == 0)
-    //TODO create deque<ePath>(std::max(lenA, lenB) populated with '-'
-        ;
-    SIZE **matrice = _levenshteinMatrice<SIZE, T, SUBTYPE>(a, b, lenA, lenB);
+    while (aBegin != aEnd && bBegin != bEnd && levenshteinStrictCompare(*aBegin, *bBegin))
+    {
+        aBegin++;
+        bBegin++;
+        lenA--;
+        lenB--;
+    }
+    if (!lenA && !lenB)
+        ; //TODO create deque<ePath>(std::max(lenA, lenB) populated with '='
+    else if (!lenA)
+        ; //TODO create deque<ePath>(std::max(lenA, lenB) populated with '=' then '-' and return it
+    else if (!lenB)
+        ; //TODO create deque<ePath>(std::max(lenA, lenB) populated with '=' then '+' and return it
+    SIZE **matrice = _levenshteinMatrice<SIZE, ITERATOR, SUBTYPE>(aBegin, aEnd, bBegin, bEnd, lenA, lenB);
     size_t i;
 
     //TODO find shortest path
+    // - goto bottom right
+    // - go back to top left going decrement ONLY (or =, if not possible)
 
 
     // Clean matrice
@@ -110,9 +137,9 @@ std::list<ePath> levenshteinShortestPath(const std::list<T*> *a, const std::list
     const size_t lenB = b->size();
 
     if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinShortestPath<unsigned char, std::list<T *>, T *>(a, b, lenA, lenB);
+        return _levenshteinShortestPath<unsigned char, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
     if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinShortestPath<unsigned short, std::list<T *>, T *>(a, b, lenA, lenB);
-    return _levenshteinShortestPath<unsigned int, std::list<T *>, T *>(a, b, lenA, lenB);
+        return _levenshteinShortestPath<unsigned short, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
+    return _levenshteinShortestPath<unsigned int, typename std::list<T *>::const_iterator, T *>(a->cbegin(), a->cend(), b->cbegin(), b->cend(), lenA, lenB);
 }
 

+ 16 - 0
src/jsonContainer.cpp

@@ -20,3 +20,19 @@ float JSonContainer::diff(const JSonElement *other) const
     return levenshteinPercent<JSonElement>(this, (const JSonContainer*)other);
 }
 
+bool JSonContainer::operator==(const JSonElement *other) const
+{
+    if (!dynamic_cast<const JSonContainer *> (other) || size() != ((const JSonContainer*)other)->size())
+        return false;
+    const_iterator a = cbegin();
+    const_iterator b = ((const JSonContainer*)other)->cbegin();
+    while (a != cend() && b != ((const JSonContainer*)other)->cend())
+    {
+        if (*a != *b)
+            return false;
+        a++;
+        b++;
+    }
+    return true;
+}
+

+ 5 - 0
src/jsonElement.cpp

@@ -114,3 +114,8 @@ float JSonElement::diff(const JSonElement *o) const
     return levenshteinPercent(stringify(), o->stringify());
 }
 
+bool JSonElement::operator==(const JSonElement *o) const
+{
+    return stringify() == o->stringify();
+}
+

+ 7 - 0
src/jsonObjectEntry.cpp

@@ -22,6 +22,13 @@ bool JSonObjectEntry::operator==(const std::string &k) const
     return key == k;
 }
 
+bool JSonObjectEntry::operator==(const JSonElement *o) const
+{
+    if (!dynamic_cast<const JSonObjectEntry*>(o))
+        return false;
+    return key == ((const JSonObjectEntry*)o)->key && value == ((const JSonObjectEntry*)o)->value;
+}
+
 JSonElement *JSonObjectEntry::operator*()
 {
     return value;

+ 15 - 4
src/levenshtein.cpp

@@ -1,3 +1,4 @@
+#include <iostream>
 #include "levenshtein.hh"
 
 float levenshteinPercent(const std::string &a, const std::string &b)
@@ -6,10 +7,10 @@ float levenshteinPercent(const std::string &a, const std::string &b)
     const size_t lenB = b.size();
 
     if (lenA < UCHAR_MAX && lenB < UCHAR_MAX)
-        return _levenshteinPercent<unsigned char, std::string, char>(&a, &b, lenA, lenB);
+        return _levenshteinPercent<unsigned char, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
     if (lenA < USHRT_MAX && lenB < USHRT_MAX)
-        return _levenshteinPercent<unsigned short, std::string, char>(&a, &b, lenA, lenB);
-    return _levenshteinPercent<unsigned int, std::string, char>(&a, &b, lenA, lenB);
+        return _levenshteinPercent<unsigned short, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
+    return _levenshteinPercent<unsigned int, std::string::const_iterator, char>(a.begin(), a.end(), b.begin(), b.end(), lenA, lenB);
 }
 
 bool levenshteinCompare(const char &a, const char &b)
@@ -19,6 +20,16 @@ bool levenshteinCompare(const char &a, const char &b)
 
 bool levenshteinCompare(const JSonElement *a, const JSonElement *b)
 {
-    return a->diff(b) > .7f;
+    return a->diff(b) > LEVENSHTEIN_SENSIBILITY;
+}
+
+bool levenshteinStrictCompare(const char &a, const char &b)
+{
+    return a == b;
+}
+
+bool levenshteinStrictCompare(const JSonElement *a, const JSonElement *b)
+{
+    return *a == b;
 }