Browse Source

Merge branch 'devel'

B Thibault 9 years ago
parent
commit
2e569df017
9 changed files with 167 additions and 61 deletions
  1. 1 0
      .gitignore
  2. 24 0
      CMakeLists.txt
  3. 2 1
      include/streamConsumer.hh
  4. 14 0
      include/unicode.hpp
  5. 2 4
      src/main.cpp
  6. 46 54
      src/streamConsumer.cpp
  7. 15 0
      src/unicode.cpp
  8. 61 0
      test/src/unicode.cpp
  9. 2 2
      test/testUnicode.json

+ 1 - 0
.gitignore

@@ -40,6 +40,7 @@ vgcore.*
 /test/wrapped_test
 /test/optional_test
 /test/levenshtein_test
+/test/unicode_test
 /cmake_install.cmake
 /Makefile
 .fuse_hidden*

+ 24 - 0
CMakeLists.txt

@@ -21,6 +21,7 @@ add_executable(jsonstroll
     src/linearHistory.cpp
     src/outputFlag.cpp
     src/streamConsumer.cpp
+    src/unicode.cpp
     src/searchPattern.cpp
     src/inputSequence.cpp
     src/levenshtein.cpp
@@ -51,6 +52,7 @@ add_executable(json_test
     src/searchPattern.cpp
     src/levenshtein.cpp
     src/streamConsumer.cpp
+    src/unicode.cpp
     src/jsonArray.cpp
     src/jsonObjectEntry.cpp
     src/jsonObject.cpp
@@ -82,6 +84,28 @@ set_property(
     PROPERTY RUNTIME_OUTPUT_DIRECTORY test
     )
 
+add_executable(unicode_test
+    src/unicode.cpp
+    src/jsonException.cpp
+    src/streamConsumer.cpp
+    src/jsonArray.cpp
+    src/jsonContainer.cpp
+    src/jsonObject.cpp
+    src/jsonObjectEntry.cpp
+    src/jsonElement.cpp
+    src/jsonPrimitive.cpp
+    src/linearHistory.cpp
+    src/searchPattern.cpp
+    src/warning.cpp
+
+    test/src/unicode.cpp
+    )
+add_test(unicode_test test/unicode_test)
+set_property(
+    TARGET unicode_test
+    PROPERTY RUNTIME_OUTPUT_DIRECTORY test
+    )
+
 add_executable(levenshtein_test
     src/levenshtein.cpp
     src/jsonContainer.cpp

+ 2 - 1
include/streamConsumer.hh

@@ -52,7 +52,7 @@ class StreamConsumer
         static std::string extractUnicode(const char *);
         static std::string extractUnicode(const std::string &);
 
-    private:
+    protected:
         /**
          * @return non-null on successfully read JSonElement, or null if token (',', '[', ...)
         **/
@@ -61,6 +61,7 @@ class StreamConsumer
         JSonElement *consumeBool(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeNumber(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeNull(JSonContainer *parent, std::stringstream &buf);
+        bool consumeEscapedChar(char c, std::stringstream &buf);
 
         /**
          * read next item, fill object or array if found

+ 14 - 0
include/unicode.hpp

@@ -0,0 +1,14 @@
+#pragma once
+
+unsigned char hexbyte(const char c);
+
+template<typename T>
+static T hexbyte(const char str[], unsigned int len)
+{
+    T result = 0;
+    for (unsigned int i =0; i < len; ++i)
+        result = (result << 4) + hexbyte(str[i]);
+    return result;
+}
+
+

+ 2 - 4
src/main.cpp

@@ -40,8 +40,6 @@ void runDiff(const Params &params)
     std::deque<Warning> warns;
     std::deque<std::string> inputNames;
 
-    if (!params.isIgnoringUnicode())
-        setlocale(LC_ALL, "");
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     {
         StreamConsumer *stream;
@@ -92,8 +90,6 @@ void run(const Params &params)
     CurseSimpleOutput *out = new CurseSimpleOutput(params);
     std::list<Warning> warns;
 
-    if (!params.isIgnoringUnicode())
-        setlocale(LC_ALL, "");
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     {
         StreamConsumer *stream;
@@ -152,6 +148,8 @@ int main(int ac, char **av)
 
     if (_run)
     {
+        if (!params->isIgnoringUnicode())
+            setlocale(LC_ALL, "");
         if (params->isDiff())
             runDiff(*params);
         else

+ 46 - 54
src/streamConsumer.cpp

@@ -6,8 +6,11 @@
 
 #include <iostream>
 #include <sstream>
+#include <codecvt>
+#include <locale>
 #include "jsonElement.hh"
 #include "streamConsumer.hh"
+#include "unicode.hpp"
 
 StreamConsumer::StreamConsumer(std::istream &s): stream(s), root(nullptr)
 { }
@@ -154,37 +157,10 @@ JSonElement *StreamConsumer::consumeString(JSonContainer *parent, std::stringstr
         }
         else
         {
-            if (c == '\\' || c == '"')
-                buf.write("\"", 1);
-            else if (c == 'u')
-            {
-                if (params && params->isIgnoringUnicode())
-                    buf.write("\\u", 2);
-                else
-                {
-                    char unicodeBuf[4];
-                    stream.read(unicodeBuf, 4);
-                    std::streamsize gcount = stream.gcount();
-                    history.put(unicodeBuf, gcount);
-                    if (gcount != 4)
-                        break;
-                    try {
-                        appendUnicode(unicodeBuf, buf);
-                    }
-                    catch (std::invalid_argument &e)
-                    {
-                        throw JsonHexvalueException(e.what(), stream.tellg(), history);
-                    }
-                }
-            }
-            else if (params && params->isStrict())
-                throw JsonEscapedException(c, stream.tellg(), history);
+            if (consumeEscapedChar(c, buf))
+                escaped = false;
             else
-            {
-                buf.write("\\", 1).write(&c, 1);
-                warnings.push_back(Warning(JsonEscapedException(c, stream.tellg(), history)));
-            }
-            escaped = false;
+                break;
         }
     }
     buf.str("");
@@ -192,6 +168,43 @@ JSonElement *StreamConsumer::consumeString(JSonContainer *parent, std::stringstr
     return nullptr;
 }
 
+bool StreamConsumer::consumeEscapedChar(char c, std::stringstream &buf)
+{
+    if (c == '\\' || c == '"' || c == '/')
+        buf.write(&c, 1);
+    else if (c == 'u')
+    {
+        if (params && params->isIgnoringUnicode())
+            buf.write("\\u", 2);
+        else
+        {
+            char unicodeBuf[4];
+            stream.read(unicodeBuf, 4);
+            std::streamsize gcount = stream.gcount();
+            history.put(unicodeBuf, gcount);
+            if (gcount != 4)
+                return false;
+            try {
+                appendUnicode(unicodeBuf, buf);
+            }
+            catch (std::invalid_argument &e)
+            {
+                throw JsonHexvalueException(e.what(), stream.tellg(), history);
+            }
+        }
+    }
+    else if (c == 'b' || c == 'f' || c == 'r' || c == 'n' || c == 't')
+        buf.write("\\", 1).write(&c, 1);
+    else if (params && params->isStrict())
+        throw JsonEscapedException(c, stream.tellg(), history);
+    else
+    {
+        buf.write("\\", 1).write(&c, 1);
+        warnings.push_back(Warning(JsonEscapedException(c, stream.tellg(), history)));
+    }
+    return true;
+}
+
 JSonElement *StreamConsumer::consumeBool(JSonContainer *parent, std::stringstream &buf, char firstChar)
 {
     size_t read =1;
@@ -335,33 +348,12 @@ JSonElement *StreamConsumer::consumeToken(JSonContainer *parent, std::stringstre
     return nullptr;
 }
 
-static unsigned char hexbyte(const char c)
-{
-    if (c >= '0' && c <= '9')
-        return c - '0';
-    if (c >= 'A' && c <= 'F')
-        return c - 'A' + 10;
-    if (c >= 'a' && c <= 'f')
-        return c - 'a' + 10;
-    throw std::invalid_argument(JsonHexvalueException::msg(c));
-}
-
-template<typename T>
-static T hexbyte(const char str[], unsigned int len)
-{
-    T result = 0;
-    for (unsigned int i =0; i < len; ++i)
-        result = (result << 4) + hexbyte(str[i]);
-    return result;
-}
-
 void StreamConsumer::appendUnicode(const char unicode[4], std::stringstream &buf)
 {
     unsigned short uni = hexbyte<unsigned short>(unicode, 4);
-    char test[5];
-    bzero(test, sizeof(*test) *5);
-    snprintf(test, 4, "%lc", uni);
-    buf.write(test, 3);
+    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
+    std::string unichar = converter.to_bytes(uni);
+    buf.write(unichar.c_str(), unichar.size());
 }
 
 std::string StreamConsumer::extractUnicode(const char *buf)

+ 15 - 0
src/unicode.cpp

@@ -0,0 +1,15 @@
+#include <stdexcept>
+#include "unicode.hpp"
+#include "jsonException.hh"
+
+unsigned char hexbyte(const char c)
+{
+    if (c >= '0' && c <= '9')
+        return c - '0';
+    if (c >= 'A' && c <= 'F')
+        return c - 'A' + 10;
+    if (c >= 'a' && c <= 'f')
+        return c - 'a' + 10;
+    throw std::invalid_argument(JsonHexvalueException::msg(c));
+}
+

+ 61 - 0
test/src/unicode.cpp

@@ -0,0 +1,61 @@
+#include <sstream>
+#include <iostream>
+#include "unicode.hpp"
+#include "streamConsumer.hh"
+
+#define FAILED(got, op, expt) {std::cout << __FILE__ << ":" << __LINE__ << ": failed asserting " << got << " " << op << " expected " << expt << std::endl; return false; }
+
+class StreamConsumerTester: public StreamConsumer
+{
+    public:
+        static const std::string getStringFromUnicode(const char unicode[4])
+        {
+            std::stringstream ss;
+            appendUnicode(unicode, ss);
+            return ss.str();
+        };
+
+        static bool test()
+        {
+            std::string s = getStringFromUnicode("00e8");
+            if (s != "è")
+                FAILED((int)(s.c_str()[0]), "!=", (int)L'è');
+            return true;
+        };
+};
+
+bool simpleTest()
+{
+    if (hexbyte<unsigned short>("0020", 4) != 32)
+        FAILED(hexbyte<unsigned short>("0020", 4), "!=", 32);
+
+    if (hexbyte<unsigned short>("20", 2) != 32)
+        FAILED(hexbyte<unsigned short>("2020", 4), "!=", 32);
+
+    if (hexbyte<unsigned short>("2020", 4) != 8224)
+        FAILED(hexbyte<unsigned short>("2020", 4), "!=", 8224);
+
+    if (hexbyte<unsigned short>("FFFF", 4) != 65535)
+        FAILED(hexbyte<unsigned short>("FFFF", 4), "!=", 65535);
+
+    if (hexbyte<unsigned short>("0000", 4) != 0)
+        FAILED(hexbyte<unsigned short>("0000", 4), "!=", 0);
+
+    if (hexbyte<unsigned short>("", 0) != 0)
+        FAILED(hexbyte<unsigned short>("", 0), "!=", 0);
+
+    if (hexbyte<unsigned short>("002020", 6) != 8224)
+        FAILED(hexbyte<unsigned short>("2020", 6), "!=", 8224);
+
+    return true;
+}
+
+int main()
+{
+    if (!simpleTest())
+        exit(EXIT_FAILURE);
+    if (!StreamConsumerTester::test())
+        exit(EXIT_FAILURE);
+    exit(EXIT_SUCCESS);
+}
+

+ 2 - 2
test/testUnicode.json

@@ -1,9 +1,9 @@
 [
-    "_\u20ac_\u20ac_",
+    "_\u20ac_\u20ac_ is _euro_euro_",
     "Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa",
     "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!",
     "ascii-only",
-    "\u058e",
+    "\u058e is 'LEFT-FACING ARMENIAN ETERNITY SIGN'",
     {
         "eurooooooooooooooooooooooooooo": "\u20ac coucou"
     }