From 8795479dbc68e80b26b61e1379165b856ef1bfc2 Mon Sep 17 00:00:00 2001
From: ABW <andrzejl@syncad.com>
Date: Thu, 3 Sep 2020 00:42:32 +0200
Subject: [PATCH] [ABW]: fixed unicode escaping

---
 hive/utils/normalize.py | 26 ++++++++++++++------------
 tests/tests_api         |  2 +-
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/hive/utils/normalize.py b/hive/utils/normalize.py
index b435cc0bd..0894cd1df 100644
--- a/hive/utils/normalize.py
+++ b/hive/utils/normalize.py
@@ -29,6 +29,7 @@ UNIT_NAI = {
 
 # convert special chars into their octal formats recognized by sql
 SPECIAL_CHARS = {
+    "\x00" : " ", # nul char cannot be stored in string column (ABW: if we ever find the need to store nul chars we'll need bytea, not text)
     "\r" : "\\015",
     "\n" : "\\012",
     "\v" : "\\013",
@@ -74,20 +75,21 @@ def escape_characters(text):
     ret = "E'"
 
     for ch in text:
-        if ch.isprintable() or ch in SPECIAL_CHARS:
-            try:
-                dw = SPECIAL_CHARS[ch]
-                ret = ret + dw
-            except KeyError:
-                ret = ret + ch
+        if ch in SPECIAL_CHARS:
+            dw = SPECIAL_CHARS[ch]
+            ret = ret + dw
+        elif ch.isprintable():
+            ret = ret + ch
         else:
+            # escaped_value = ch.encode('unicode-escape').decode('utf-8')
             ordinal = ord(ch)
-            if ordinal == 0 or ordinal >= 0x80:
-                escaped_value = 'u' + hex(ordinal)[2:]
-#                logging.info("Encoded unicode escape: {}".format(escaped_value))
-            else:
-                escaped_value = ch.encode('unicode-escape').decode('utf-8')
-
+            hexstr = hex(ordinal)[2:]
+            escaped_value = '\\u'
+            i = len(hexstr)
+            while i < 4:
+                escaped_value += '0'
+                i += 1
+            escaped_value += hexstr
             ret = ret + escaped_value
 
     ret = ret + "'"
diff --git a/tests/tests_api b/tests/tests_api
index c673b555a..7d925b4e8 160000
--- a/tests/tests_api
+++ b/tests/tests_api
@@ -1 +1 @@
-Subproject commit c673b555aa055358e0f5a1e1401a4110f7f83ca3
+Subproject commit 7d925b4e88faafd6d0154725bfe26d7bdfaee23f
-- 
GitLab