From 65138c5cd99a4f55a092a74014e48ce76f81ff27 Mon Sep 17 00:00:00 2001
From: ABW <andrzejl@syncad.com>
Date: Fri, 4 Sep 2020 19:54:19 +0200
Subject: [PATCH] [ABW]: some characters apparently need long 8-byte encoding
 (problem actually caused by use of ujson)

---
 hive/utils/normalize.py | 26 +++++++++++++++-----------
 tests/tests_api         |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/hive/utils/normalize.py b/hive/utils/normalize.py
index 0894cd1df..0fe4f5d13 100644
--- a/hive/utils/normalize.py
+++ b/hive/utils/normalize.py
@@ -78,19 +78,23 @@ def escape_characters(text):
         if ch in SPECIAL_CHARS:
             dw = SPECIAL_CHARS[ch]
             ret = ret + dw
-        elif ch.isprintable():
-            ret = ret + ch
         else:
-            # escaped_value = ch.encode('unicode-escape').decode('utf-8')
             ordinal = ord(ch)
-            hexstr = hex(ordinal)[2:]
-            escaped_value = '\\u'
-            i = len(hexstr)
-            while i < 4:
-                escaped_value += '0'
-                i += 1
-            escaped_value += hexstr
-            ret = ret + escaped_value
+            if ordinal <= 0x80 and ch.isprintable():
+                ret = ret + ch
+            else:
+                hexstr = hex(ordinal)[2:]
+                i = len(hexstr)
+                max = 4
+                escaped_value = '\\u'
+                if i > max:
+                    max = 8
+                    escaped_value = '\\U'
+                while i < max:
+                    escaped_value += '0'
+                    i += 1
+                escaped_value += hexstr
+                ret = ret + escaped_value
 
     ret = ret + "'"
     return ret
diff --git a/tests/tests_api b/tests/tests_api
index 4ee51004b..fa660ef0e 160000
--- a/tests/tests_api
+++ b/tests/tests_api
@@ -1 +1 @@
-Subproject commit 4ee51004b4d83d2c12ca8f6e10faab762cc0262f
+Subproject commit fa660ef0ee019ba9c2da91e7a9140423593d944e
-- 
GitLab