Commit 604a7765 authored by Kirill Smelkov's avatar Kirill Smelkov

strconv, golang_str: Switch quote, unquote and qq to always return bstr

bstr is becoming the default pygolang string type. And it can be mixed
ok with all bytes/unicode and ustr. Previously e.g. strconv.quote was
checking which kind of type its input was and was trying to return the
result of the same type. Now this becomes unnecessary since bstr is
intended to be used universally and interoperable with all other string
types.
parent bbbb58f0
......@@ -549,19 +549,8 @@ def pyqq(obj):
# py2: unicode | str
# py3: str | bytes
if not isinstance(obj, (unicode, bytes)):
obj = str(obj)
qobj = pystrconv.quote(obj)
# `printf('%s', qq(obj))` should work. For this make sure qobj is always
# a-la str type (unicode on py3, bytes on py2), that can be transparently
# converted to unicode or bytes as needed.
if PY_MAJOR_VERSION >= 3:
qobj = pyu(qobj)
else:
qobj = pyb(qobj)
return qobj
obj = _bstringify(obj)
return pystrconv.quote(obj)
......
......@@ -672,15 +672,22 @@ def test_strings_subclasses(tx):
def test_qq():
# NOTE qq is also tested as part of strconv.quote
# qq(any) returns string type
assert isinstance(qq(b('мир')), str) # qq(b) -> str (bytes·py2, unicode·py3)
assert isinstance(qq( u'мир'), str) # qq(u) -> str (bytes·py2, unicode·py3)
# however what qq returns can be mixed with both unicode and bytes
assert b'hello %s !' % qq(b('мир')) == b('hello "мир" !') # b % qq(b)
assert b'hello %s !' % qq(u('мир')) == b('hello "мир" !') # b % qq(u) -> b
assert u'hello %s !' % qq(u('мир')) == u('hello "мир" !') # u % qq(u)
assert u'hello %s !' % qq(b('мир')) == u'hello "мир" !' # u % qq(b) -> u
# qq(any) -> bstr
def _(s, qqok):
_ = qq(s)
assert type(_) is bstr
assert _ == qqok
_( xbytes('мир'), '"мир"') # b''
_( u'мир', '"мир"') # u''
_( xbytearray('мир'), '"мир"') # bytearray()
_( b('мир'), '"мир"') # b()
_( u('мир'), '"мир"') # u()
_( 1, '"1"') # int
# what qq returns - bstr - can be mixed with both unicode, bytes and bytearray
# it is tested e.g. in test_strings_ops2 and test_strings_mod_and_format
# ----------------------------------------
......
......@@ -22,29 +22,16 @@
from __future__ import print_function, absolute_import
import unicodedata, codecs
from six import text_type as unicode # py2: unicode py3: str
from six.moves import range as xrange
from golang import b, u
from golang import b
from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
# _bstr is like b but also returns whether input was unicode.
def _bstr(s): # -> sbytes, wasunicode
return b(s), isinstance(s, unicode)
# _ustr is like u but also returns whether input was bytes.
def _ustr(s): # -> sunicode, wasbytes
return u(s), isinstance(s, bytes)
# quote quotes unicode|bytes string into valid "..." unicode|bytes string always quoted with ".
def quote(s):
s, wasunicode = _bstr(s)
qs = _quote(s)
if wasunicode:
qs, _ = _ustr(qs)
return qs
# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
def quote(s): # -> bstr
q = _quote(b(s))
return b(q)
def _quote(s):
assert isinstance(s, bytes)
......@@ -103,7 +90,7 @@ def _quote(s):
# unquote decodes "-quoted unicode|byte string.
#
# ValueError is raised if there are quoting syntax errors.
def unquote(s):
def unquote(s): # -> bstr
us, tail = unquote_next(s)
if len(tail) != 0:
raise ValueError('non-empty tail after closing "')
......@@ -114,13 +101,9 @@ def unquote(s):
# it returns -> (unquoted(s), tail-after-")
#
# ValueError is raised if there are quoting syntax errors.
def unquote_next(s):
s, wasunicode = _bstr(s)
us, tail = _unquote_next(s)
if wasunicode:
us, _ = _ustr(us)
tail, _ = _ustr(tail)
return us, tail
def unquote_next(s): # -> (bstr, bstr)
us, tail = _unquote_next(b(s))
return b(us), b(tail)
def _unquote_next(s):
assert isinstance(s, bytes)
......
......@@ -20,10 +20,11 @@
from __future__ import print_function, absolute_import
from golang import bstr
from golang.strconv import quote, unquote, unquote_next
from golang.gcompat import qq
from six import int2byte as bchr, PY3
from six import int2byte as bchr
from six.moves import range as xrange
from pytest import raises
......@@ -34,16 +35,9 @@ def byterange(start, stop):
return b
# asstr converts unicode|bytes to str type of current python.
def asstr(s):
if PY3:
if isinstance(s, bytes):
s = s.decode('utf-8')
# PY2
else:
if isinstance(s, unicode):
s = s.encode('utf-8')
return s
def assert_bstreq(x, y):
assert type(x) is bstr
assert x == y
def test_quote():
testv = (
......@@ -72,6 +66,9 @@ def test_quote():
(u'\ufffd', u'�'),
)
# quote/unquote* always give bstr
BEQ = assert_bstreq
for tin, tquoted in testv:
# quote(in) == quoted
# in = unquote(quoted)
......@@ -79,14 +76,13 @@ def test_quote():
tail = b'123' if isinstance(tquoted, bytes) else '123'
tquoted = q + tquoted + q # add lead/trail "
assert quote(tin) == tquoted
assert unquote(tquoted) == tin
assert unquote_next(tquoted) == (tin, type(tin)())
assert unquote_next(tquoted + tail) == (tin, tail)
BEQ(quote(tin), tquoted)
BEQ(unquote(tquoted), tin)
_, __ = unquote_next(tquoted); BEQ(_, tin); BEQ(__, "")
_, __ = unquote_next(tquoted + tail); BEQ(_, tin); BEQ(__, tail)
with raises(ValueError): unquote(tquoted + tail)
# qq always gives str
assert qq(tin) == asstr(tquoted)
BEQ(qq(tin), tquoted)
# also check how it works on complementary unicode/bytes input type
if isinstance(tin, bytes):
......@@ -103,14 +99,13 @@ def test_quote():
tquoted = tquoted.encode('utf-8')
tail = tail.encode('utf-8')
assert quote(tin) == tquoted
assert unquote(tquoted) == tin
assert unquote_next(tquoted) == (tin, type(tin)())
assert unquote_next(tquoted + tail) == (tin, tail)
BEQ(quote(tin), tquoted)
BEQ(unquote(tquoted), tin)
_, __ = unquote_next(tquoted); BEQ(_, tin); BEQ(__, "")
_, __ = unquote_next(tquoted + tail); BEQ(_, tin); BEQ(__, tail)
with raises(ValueError): unquote(tquoted + tail)
# qq always gives str
assert qq(tin) == asstr(tquoted)
BEQ(qq(tin), tquoted)
# verify that non-canonical quotation can be unquoted too.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment