浏览代码

Merge branch 'devel'

B Thibault 9 年之前
父节点
当前提交
9f9e6fe687
共有 9 个文件被更改,包括 167 次插入61 次删除
  1. 1 0
      .gitignore
  2. 24 0
      CMakeLists.txt
  3. 2 1
      include/streamConsumer.hh
  4. 14 0
      include/unicode.hpp
  5. 2 4
      src/main.cpp
  6. 46 54
      src/streamConsumer.cpp
  7. 15 0
      src/unicode.cpp
  8. 61 0
      test/src/unicode.cpp
  9. 2 2
      test/testUnicode.json

+ 1 - 0
.gitignore

@@ -40,6 +40,7 @@ vgcore.*
 /test/wrapped_test
 /test/wrapped_test
 /test/optional_test
 /test/optional_test
 /test/levenshtein_test
 /test/levenshtein_test
+/test/unicode_test
 /cmake_install.cmake
 /cmake_install.cmake
 /Makefile
 /Makefile
 .fuse_hidden*
 .fuse_hidden*

+ 24 - 0
CMakeLists.txt

@@ -21,6 +21,7 @@ add_executable(jsonstroll
     src/linearHistory.cpp
     src/linearHistory.cpp
     src/outputFlag.cpp
     src/outputFlag.cpp
     src/streamConsumer.cpp
     src/streamConsumer.cpp
+    src/unicode.cpp
     src/searchPattern.cpp
     src/searchPattern.cpp
     src/inputSequence.cpp
     src/inputSequence.cpp
     src/levenshtein.cpp
     src/levenshtein.cpp
@@ -51,6 +52,7 @@ add_executable(json_test
     src/searchPattern.cpp
     src/searchPattern.cpp
     src/levenshtein.cpp
     src/levenshtein.cpp
     src/streamConsumer.cpp
     src/streamConsumer.cpp
+    src/unicode.cpp
     src/jsonArray.cpp
     src/jsonArray.cpp
     src/jsonObjectEntry.cpp
     src/jsonObjectEntry.cpp
     src/jsonObject.cpp
     src/jsonObject.cpp
@@ -82,6 +84,28 @@ set_property(
     PROPERTY RUNTIME_OUTPUT_DIRECTORY test
     PROPERTY RUNTIME_OUTPUT_DIRECTORY test
     )
     )
 
 
+add_executable(unicode_test
+    src/unicode.cpp
+    src/jsonException.cpp
+    src/streamConsumer.cpp
+    src/jsonArray.cpp
+    src/jsonContainer.cpp
+    src/jsonObject.cpp
+    src/jsonObjectEntry.cpp
+    src/jsonElement.cpp
+    src/jsonPrimitive.cpp
+    src/linearHistory.cpp
+    src/searchPattern.cpp
+    src/warning.cpp
+
+    test/src/unicode.cpp
+    )
+add_test(unicode_test test/unicode_test)
+set_property(
+    TARGET unicode_test
+    PROPERTY RUNTIME_OUTPUT_DIRECTORY test
+    )
+
 add_executable(levenshtein_test
 add_executable(levenshtein_test
     src/levenshtein.cpp
     src/levenshtein.cpp
     src/jsonContainer.cpp
     src/jsonContainer.cpp

+ 2 - 1
include/streamConsumer.hh

@@ -52,7 +52,7 @@ class StreamConsumer
         static std::string extractUnicode(const char *);
         static std::string extractUnicode(const char *);
         static std::string extractUnicode(const std::string &);
         static std::string extractUnicode(const std::string &);
 
 
-    private:
+    protected:
         /**
         /**
          * @return non-null on successfully read JSonElement, or null if token (',', '[', ...)
          * @return non-null on successfully read JSonElement, or null if token (',', '[', ...)
         **/
         **/
@@ -61,6 +61,7 @@ class StreamConsumer
         JSonElement *consumeBool(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeBool(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeNumber(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeNumber(JSonContainer *parent, std::stringstream &buf, char c);
         JSonElement *consumeNull(JSonContainer *parent, std::stringstream &buf);
         JSonElement *consumeNull(JSonContainer *parent, std::stringstream &buf);
+        bool consumeEscapedChar(char c, std::stringstream &buf);
 
 
         /**
         /**
          * read next item, fill object or array if found
          * read next item, fill object or array if found

+ 14 - 0
include/unicode.hpp

@@ -0,0 +1,14 @@
+#pragma once
+
+unsigned char hexbyte(const char c);
+
+template<typename T>
+static T hexbyte(const char str[], unsigned int len)
+{
+    T result = 0;
+    for (unsigned int i =0; i < len; ++i)
+        result = (result << 4) + hexbyte(str[i]);
+    return result;
+}
+
+

+ 2 - 4
src/main.cpp

@@ -40,8 +40,6 @@ void runDiff(const Params &params)
     std::deque<Warning> warns;
     std::deque<Warning> warns;
     std::deque<std::string> inputNames;
     std::deque<std::string> inputNames;
 
 
-    if (!params.isIgnoringUnicode())
-        setlocale(LC_ALL, "");
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     {
     {
         StreamConsumer *stream;
         StreamConsumer *stream;
@@ -92,8 +90,6 @@ void run(const Params &params)
     CurseSimpleOutput *out = new CurseSimpleOutput(params);
     CurseSimpleOutput *out = new CurseSimpleOutput(params);
     std::list<Warning> warns;
     std::list<Warning> warns;
 
 
-    if (!params.isIgnoringUnicode())
-        setlocale(LC_ALL, "");
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     for (std::pair<std::string, std::basic_istream<char>*> input : inputs)
     {
     {
         StreamConsumer *stream;
         StreamConsumer *stream;
@@ -152,6 +148,8 @@ int main(int ac, char **av)
 
 
     if (_run)
     if (_run)
     {
     {
+        if (!params->isIgnoringUnicode())
+            setlocale(LC_ALL, "");
         if (params->isDiff())
         if (params->isDiff())
             runDiff(*params);
             runDiff(*params);
         else
         else

+ 46 - 54
src/streamConsumer.cpp

@@ -6,8 +6,11 @@
 
 
 #include <iostream>
 #include <iostream>
 #include <sstream>
 #include <sstream>
+#include <codecvt>
+#include <locale>
 #include "jsonElement.hh"
 #include "jsonElement.hh"
 #include "streamConsumer.hh"
 #include "streamConsumer.hh"
+#include "unicode.hpp"
 
 
 StreamConsumer::StreamConsumer(std::istream &s): stream(s), root(nullptr)
 StreamConsumer::StreamConsumer(std::istream &s): stream(s), root(nullptr)
 { }
 { }
@@ -154,37 +157,10 @@ JSonElement *StreamConsumer::consumeString(JSonContainer *parent, std::stringstr
         }
         }
         else
         else
         {
         {
-            if (c == '\\' || c == '"')
-                buf.write("\"", 1);
-            else if (c == 'u')
-            {
-                if (params && params->isIgnoringUnicode())
-                    buf.write("\\u", 2);
-                else
-                {
-                    char unicodeBuf[4];
-                    stream.read(unicodeBuf, 4);
-                    std::streamsize gcount = stream.gcount();
-                    history.put(unicodeBuf, gcount);
-                    if (gcount != 4)
-                        break;
-                    try {
-                        appendUnicode(unicodeBuf, buf);
-                    }
-                    catch (std::invalid_argument &e)
-                    {
-                        throw JsonHexvalueException(e.what(), stream.tellg(), history);
-                    }
-                }
-            }
-            else if (params && params->isStrict())
-                throw JsonEscapedException(c, stream.tellg(), history);
+            if (consumeEscapedChar(c, buf))
+                escaped = false;
             else
             else
-            {
-                buf.write("\\", 1).write(&c, 1);
-                warnings.push_back(Warning(JsonEscapedException(c, stream.tellg(), history)));
-            }
-            escaped = false;
+                break;
         }
         }
     }
     }
     buf.str("");
     buf.str("");
@@ -192,6 +168,43 @@ JSonElement *StreamConsumer::consumeString(JSonContainer *parent, std::stringstr
     return nullptr;
     return nullptr;
 }
 }
 
 
+bool StreamConsumer::consumeEscapedChar(char c, std::stringstream &buf)
+{
+    if (c == '\\' || c == '"' || c == '/')
+        buf.write(&c, 1);
+    else if (c == 'u')
+    {
+        if (params && params->isIgnoringUnicode())
+            buf.write("\\u", 2);
+        else
+        {
+            char unicodeBuf[4];
+            stream.read(unicodeBuf, 4);
+            std::streamsize gcount = stream.gcount();
+            history.put(unicodeBuf, gcount);
+            if (gcount != 4)
+                return false;
+            try {
+                appendUnicode(unicodeBuf, buf);
+            }
+            catch (std::invalid_argument &e)
+            {
+                throw JsonHexvalueException(e.what(), stream.tellg(), history);
+            }
+        }
+    }
+    else if (c == 'b' || c == 'f' || c == 'r' || c == 'n' || c == 't')
+        buf.write("\\", 1).write(&c, 1);
+    else if (params && params->isStrict())
+        throw JsonEscapedException(c, stream.tellg(), history);
+    else
+    {
+        buf.write("\\", 1).write(&c, 1);
+        warnings.push_back(Warning(JsonEscapedException(c, stream.tellg(), history)));
+    }
+    return true;
+}
+
 JSonElement *StreamConsumer::consumeBool(JSonContainer *parent, std::stringstream &buf, char firstChar)
 JSonElement *StreamConsumer::consumeBool(JSonContainer *parent, std::stringstream &buf, char firstChar)
 {
 {
     size_t read =1;
     size_t read =1;
@@ -335,33 +348,12 @@ JSonElement *StreamConsumer::consumeToken(JSonContainer *parent, std::stringstre
     return nullptr;
     return nullptr;
 }
 }
 
 
-static unsigned char hexbyte(const char c)
-{
-    if (c >= '0' && c <= '9')
-        return c - '0';
-    if (c >= 'A' && c <= 'F')
-        return c - 'A' + 10;
-    if (c >= 'a' && c <= 'f')
-        return c - 'a' + 10;
-    throw std::invalid_argument(JsonHexvalueException::msg(c));
-}
-
-template<typename T>
-static T hexbyte(const char str[], unsigned int len)
-{
-    T result = 0;
-    for (unsigned int i =0; i < len; ++i)
-        result = (result << 4) + hexbyte(str[i]);
-    return result;
-}
-
 void StreamConsumer::appendUnicode(const char unicode[4], std::stringstream &buf)
 void StreamConsumer::appendUnicode(const char unicode[4], std::stringstream &buf)
 {
 {
     unsigned short uni = hexbyte<unsigned short>(unicode, 4);
     unsigned short uni = hexbyte<unsigned short>(unicode, 4);
-    char test[5];
-    bzero(test, sizeof(*test) *5);
-    snprintf(test, 4, "%lc", uni);
-    buf.write(test, 3);
+    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
+    std::string unichar = converter.to_bytes(uni);
+    buf.write(unichar.c_str(), unichar.size());
 }
 }
 
 
 std::string StreamConsumer::extractUnicode(const char *buf)
 std::string StreamConsumer::extractUnicode(const char *buf)

+ 15 - 0
src/unicode.cpp

@@ -0,0 +1,15 @@
+#include <stdexcept>
+#include "unicode.hpp"
+#include "jsonException.hh"
+
+unsigned char hexbyte(const char c)
+{
+    if (c >= '0' && c <= '9')
+        return c - '0';
+    if (c >= 'A' && c <= 'F')
+        return c - 'A' + 10;
+    if (c >= 'a' && c <= 'f')
+        return c - 'a' + 10;
+    throw std::invalid_argument(JsonHexvalueException::msg(c));
+}
+

+ 61 - 0
test/src/unicode.cpp

@@ -0,0 +1,61 @@
+#include <sstream>
+#include <iostream>
+#include "unicode.hpp"
+#include "streamConsumer.hh"
+
+#define FAILED(got, op, expt) {std::cout << __FILE__ << ":" << __LINE__ << ": failed asserting " << got << " " << op << " expected " << expt << std::endl; return false; }
+
+class StreamConsumerTester: public StreamConsumer
+{
+    public:
+        static const std::string getStringFromUnicode(const char unicode[4])
+        {
+            std::stringstream ss;
+            appendUnicode(unicode, ss);
+            return ss.str();
+        };
+
+        static bool test()
+        {
+            std::string s = getStringFromUnicode("00e8");
+            if (s != "è")
+                FAILED((int)(s.c_str()[0]), "!=", (int)L'è');
+            return true;
+        };
+};
+
+bool simpleTest()
+{
+    if (hexbyte<unsigned short>("0020", 4) != 32)
+        FAILED(hexbyte<unsigned short>("0020", 4), "!=", 32);
+
+    if (hexbyte<unsigned short>("20", 2) != 32)
+        FAILED(hexbyte<unsigned short>("2020", 4), "!=", 32);
+
+    if (hexbyte<unsigned short>("2020", 4) != 8224)
+        FAILED(hexbyte<unsigned short>("2020", 4), "!=", 8224);
+
+    if (hexbyte<unsigned short>("FFFF", 4) != 65535)
+        FAILED(hexbyte<unsigned short>("FFFF", 4), "!=", 65535);
+
+    if (hexbyte<unsigned short>("0000", 4) != 0)
+        FAILED(hexbyte<unsigned short>("0000", 4), "!=", 0);
+
+    if (hexbyte<unsigned short>("", 0) != 0)
+        FAILED(hexbyte<unsigned short>("", 0), "!=", 0);
+
+    if (hexbyte<unsigned short>("002020", 6) != 8224)
+        FAILED(hexbyte<unsigned short>("2020", 6), "!=", 8224);
+
+    return true;
+}
+
+int main()
+{
+    if (!simpleTest())
+        exit(EXIT_FAILURE);
+    if (!StreamConsumerTester::test())
+        exit(EXIT_FAILURE);
+    exit(EXIT_SUCCESS);
+}
+

+ 2 - 2
test/testUnicode.json

@@ -1,9 +1,9 @@
 [
 [
-    "_\u20ac_\u20ac_",
+    "_\u20ac_\u20ac_ is _euro_euro_",
     "Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa",
     "Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa",
     "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!",
     "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!",
     "ascii-only",
     "ascii-only",
-    "\u058e",
+    "\u058e is 'LEFT-FACING ARMENIAN ETERNITY SIGN'",
     {
     {
         "eurooooooooooooooooooooooooooo": "\u20ac coucou"
         "eurooooooooooooooooooooooooooo": "\u20ac coucou"
     }
     }