strconv, golang_str: Switch quote, unquote and qq to always return bstr

bstr is becoming the default pygolang string type. And it can be mixed ok with all bytes/unicode and ustr. Previously e.g. strconv.quote was checking which kind of type its input was and was trying to return the result of the same type. Now this becomes unnecessary since bstr is intended to be used universally and interoperable with all other string types.

strconv, golang_str: Switch quote, unquote and qq to always return bstr
bstr is becoming the default pygolang string type. And it can be mixed ok with all bytes/unicode and ustr. Previously e.g. strconv.quote was checking which kind of type its input was and was trying to return the result of the same type. Now this becomes unnecessary since bstr is intended to be used universally and interoperable with all other string types.
604a7765 · Kirill Smelkov · bbbb58f0 · 604a7765 · 604a7765 · 604a7765
Commit 604a7765 authored Oct 07, 2022 by Kirill Smelkov
Showing with 45 additions and 71 deletions

golang/_golang_str.pyx golang/_golang_str.pyx +2 -13

golang/golang_str_test.py golang/golang_str_test.py +16 -9

golang/strconv.py golang/strconv.py +9 -26

golang/strconv_test.py golang/strconv_test.py +18 -23

No files found.
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -549,19 +549,8 @@ def pyqq(obj):
    # py2: unicode | str
    # py3: str     | bytes
    if not isinstance(obj, (unicode, bytes)):
-        obj = str(obj)
+        obj = _bstringify(obj)
+    return pystrconv.quote(obj)
-    qobj = pystrconv.quote(obj)
-    # `printf('%s', qq(obj))` should work. For this make sure qobj is always
-    # a-la str type (unicode on py3, bytes on py2), that can be transparently
-    # converted to unicode or bytes as needed.
-    if PY_MAJOR_VERSION >= 3:
-        qobj = pyu(qobj)
-    else:
-        qobj = pyb(qobj)
-    return qobj

--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -672,15 +672,22 @@ def test_strings_subclasses(tx):
 def test_qq():
    # NOTE qq is also tested as part of strconv.quote
-    # qq(any) returns string type
+    # qq(any) -> bstr
-    assert isinstance(qq(b('мир')), str)    # qq(b) -> str (bytes·py2, unicode·py3)
+    def _(s, qqok):
-    assert isinstance(qq( u'мир'),  str)    # qq(u) -> str (bytes·py2, unicode·py3)
+        _ = qq(s)
+        assert type(_) is bstr
-    # however what qq returns can be mixed with both unicode and bytes
+        assert _ == qqok
-    assert b'hello %s !' % qq(b('мир')) == b('hello "мир" !')   # b % qq(b)
-    assert b'hello %s !' % qq(u('мир')) == b('hello "мир" !')   # b % qq(u) -> b
+    _(      xbytes('мир'),  '"мир"')            # b''
-    assert u'hello %s !' % qq(u('мир')) == u('hello "мир" !')   # u % qq(u)
+    _(            u'мир',   '"мир"')            # u''
-    assert u'hello %s !' % qq(b('мир')) ==  u'hello "мир" !'    # u % qq(b) -> u
+    _(  xbytearray('мир'),  '"мир"')            # bytearray()
+    _(           b('мир'),  '"мир"')            # b()
+    _(           u('мир'),  '"мир"')            # u()
+    _(                  1,  '"1"')              # int
+    # what qq returns - bstr - can be mixed with both unicode, bytes and bytearray
+    # it is tested e.g. in test_strings_ops2 and test_strings_mod_and_format
 # ----------------------------------------

--- a/golang/strconv.py
+++ b/golang/strconv.py
@@ -22,29 +22,16 @@
 from __future__ import print_function, absolute_import
 import unicodedata, codecs
-from six import text_type as unicode        # py2: unicode      py3: str
 from six.moves import range as xrange
-from golang import b, u
+from golang import b
 from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
-# _bstr is like b but also returns whether input was unicode.
+# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
-def _bstr(s):   # -> sbytes, wasunicode
+def quote(s):  # -> bstr
-    return b(s), isinstance(s, unicode)
+    q = _quote(b(s))
+    return b(q)
-# _ustr is like u but also returns whether input was bytes.
-def _ustr(s):   # -> sunicode, wasbytes
-    return u(s), isinstance(s, bytes)
-# quote quotes unicode|bytes string into valid "..." unicode|bytes string always quoted with ".
-def quote(s):
-    s, wasunicode = _bstr(s)
-    qs = _quote(s)
-    if wasunicode:
-        qs, _ = _ustr(qs)
-    return qs
 def _quote(s):
    assert isinstance(s, bytes)
@@ -103,7 +90,7 @@ def _quote(s):
 # unquote decodes "-quoted unicode|byte string.
 #
 # ValueError is raised if there are quoting syntax errors.
-def unquote(s):
+def unquote(s):  # -> bstr
    us, tail = unquote_next(s)
    if len(tail) != 0:
        raise ValueError('non-empty tail after closing "')
@@ -114,13 +101,9 @@ def unquote(s):
 # it returns -> (unquoted(s), tail-after-")
 #
 # ValueError is raised if there are quoting syntax errors.
-def unquote_next(s):
+def unquote_next(s):  # -> (bstr, bstr)
-    s, wasunicode = _bstr(s)
+    us, tail = _unquote_next(b(s))
-    us, tail = _unquote_next(s)
+    return b(us), b(tail)
-    if wasunicode:
-        us, _   = _ustr(us)
-        tail, _ = _ustr(tail)
-    return us, tail
 def _unquote_next(s):
    assert isinstance(s, bytes)

--- a/golang/strconv_test.py
+++ b/golang/strconv_test.py
@@ -20,10 +20,11 @@
 from __future__ import print_function, absolute_import
+from golang import bstr
 from golang.strconv import quote, unquote, unquote_next
 from golang.gcompat import qq
-from six import int2byte as bchr, PY3
+from six import int2byte as bchr
 from six.moves import range as xrange
 from pytest import raises
@@ -34,16 +35,9 @@ def byterange(start, stop):
    return b
-# asstr converts unicode|bytes to str type of current python.
+def assert_bstreq(x, y):
-def asstr(s):
+    assert type(x) is bstr
-    if PY3:
+    assert x == y
-        if isinstance(s, bytes):
-            s = s.decode('utf-8')
-    # PY2
-    else:
-        if isinstance(s, unicode):
-            s = s.encode('utf-8')
-    return s
 def test_quote():
    testv = (
@@ -72,6 +66,9 @@ def test_quote():
        (u'\ufffd',         u'�'),
    )
+    # quote/unquote* always give bstr
+    BEQ = assert_bstreq
    for tin, tquoted in testv:
        # quote(in) == quoted
        # in = unquote(quoted)
@@ -79,14 +76,13 @@ def test_quote():
        tail = b'123' if isinstance(tquoted, bytes) else '123'
        tquoted = q + tquoted + q   # add lead/trail "
-        assert quote(tin) == tquoted
+        BEQ(quote(tin), tquoted)
-        assert unquote(tquoted) == tin
+        BEQ(unquote(tquoted), tin)
-        assert unquote_next(tquoted) == (tin, type(tin)())
+        _, __ = unquote_next(tquoted);          BEQ(_, tin);  BEQ(__, "")
-        assert unquote_next(tquoted + tail) == (tin, tail)
+        _, __ = unquote_next(tquoted + tail);   BEQ(_, tin);  BEQ(__, tail)
        with raises(ValueError): unquote(tquoted + tail)
-        # qq always gives str
+        BEQ(qq(tin), tquoted)
-        assert qq(tin) == asstr(tquoted)
        # also check how it works on complementary unicode/bytes input type
        if isinstance(tin, bytes):
@@ -103,14 +99,13 @@ def test_quote():
            tquoted = tquoted.encode('utf-8')
            tail = tail.encode('utf-8')
-        assert quote(tin) == tquoted
+        BEQ(quote(tin), tquoted)
-        assert unquote(tquoted) == tin
+        BEQ(unquote(tquoted), tin)
-        assert unquote_next(tquoted) == (tin, type(tin)())
+        _, __ = unquote_next(tquoted);          BEQ(_, tin);  BEQ(__, "")
-        assert unquote_next(tquoted + tail) == (tin, tail)
+        _, __ = unquote_next(tquoted + tail);   BEQ(_, tin);  BEQ(__, tail)
        with raises(ValueError): unquote(tquoted + tail)
-        # qq always gives str
+        BEQ(qq(tin), tquoted)
-        assert qq(tin) == asstr(tquoted)
 # verify that non-canonical quotation can be unquoted too.