| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- /**********
- This file is part of levenshpp.
- levenshpp is free library: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- levenshpp is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with levenshpp. If not, see <http://www.gnu.org/licenses/>.
- **********/
- #pragma once
- #include <algorithm>
- #include <utility>
- #include <set>
- template <typename T>
- class LevenshteinPotencial
- {
- public:
- LevenshteinPotencial(const T &px, const T &py, const T value): coords(std::pair<T, T>(px, py)), minValue(value)
- { }
- unsigned int getTaxiCab() const
- {
- return coords.first +coords.second;
- }
- bool operator<(const LevenshteinPotencial &o) const
- {
- if (minValue == o.minValue)
- return getTaxiCab() > o.getTaxiCab(); // The bigger the best
- return minValue < o.minValue;
- }
- bool operator==(const std::pair<T, T> &o) const
- {
- return coords.first == o.first && coords.second == o.second;
- }
- std::pair<T, T> coords;
- T minValue;
- };
- static int levenshtein_get(int **map, int px, int py)
- {
- if (px == -1 && py == -1)
- return 0;
- if (px == -1)
- return py +1;
- if (py == -1)
- return px +1;
- return map[px][py];
- }
- template <class T, typename SIZE=unsigned int>
- unsigned int levenshtein(const T &a, const T &b, const SIZE aSize, const SIZE bSize)
- {
- int **items = new int*[aSize]();
- std::multiset<LevenshteinPotencial<SIZE> > toProcess;
- for (SIZE i =0; i < aSize; i++)
- {
- items[i] = new int[bSize]();
- toProcess.insert(LevenshteinPotencial<SIZE>(0, i, i));
- for (SIZE j=0; j < bSize; j++)
- items[i][j] = -1;
- }
- for (SIZE i =1; i < bSize; i++)
- toProcess.insert(LevenshteinPotencial<SIZE>(i, 0, i));
- while (toProcess.size())
- {
- const auto currentIt = toProcess.cbegin();
- const LevenshteinPotencial<SIZE> ¤t = *(currentIt);
- int add = levenshtein_get(items, current.coords.first -1, current.coords.second);
- int rem = levenshtein_get(items, current.coords.first, current.coords.second -1);
- int mod = levenshtein_get(items, current.coords.first -1, current.coords.second -1);
- int min = -1;
- // Compute weight
- if (add != -1)
- min = add +1;
- if (rem != -1)
- min = min == -1 ? rem +1 : std::min(min, rem +1);
- if (mod != -1)
- {
- if (a[current.coords.first] != b[current.coords.second])
- mod++;
- min = min == -1 ? mod : std::min(min, mod);
- }
- items[current.coords.first][current.coords.second] = min;
- if (current.coords.first == aSize -1 && current.coords.second == bSize -1)
- break;
- //update toProcess
- add = rem = mod = -1;
- for (auto i = toProcess.cbegin(); i != toProcess.cend(); i++)
- {
- if (*i == std::pair<SIZE, SIZE>(current.coords.first, current.coords.second +1))
- {
- add = (*i).minValue;
- toProcess.erase(i);
- }
- else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second))
- {
- rem = (*i).minValue;
- toProcess.erase(i);
- }
- else if (*i == std::pair<SIZE, SIZE>(current.coords.first +1, current.coords.second +1))
- {
- mod = (*i).minValue;
- toProcess.erase(i);
- }
- }
- if (current.coords.second +1 < bSize && items[current.coords.first][current.coords.second +1] == -1)
- toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first, current.coords.second +1, add == -1 ? min +1 : std::min(min +1, add)));
- if (current.coords.first +1 < aSize && items[current.coords.first +1][current.coords.second] == -1)
- toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second, rem == -1 ? min +1 : std::min(min +1, rem)));
- if (current.coords.first +1 < aSize && current.coords.second +1 < bSize &&
- items[current.coords.first +1][current.coords.second +1] == -1)
- toProcess.insert(LevenshteinPotencial<SIZE>(current.coords.first +1, current.coords.second +1, mod == -1 ? min : std::min(mod, min)));
- toProcess.erase(currentIt);
- }
- const unsigned int levenshtein = items[aSize -1][bSize -1];
- for (SIZE i =0; i < aSize; i++)
- delete[] items[i];
- delete[] items;
- return levenshtein;
- }
|