Commit 604a7765 authored by Kirill Smelkov's avatar Kirill Smelkov

strconv, golang_str: Switch quote, unquote and qq to always return bstr

bstr is becoming the default pygolang string type. And it can be mixed
ok with all bytes/unicode and ustr. Previously e.g. strconv.quote was
checking which kind of type its input was and was trying to return the
result of the same type. Now this becomes unnecessary since bstr is
intended to be used universally and interoperable with all other string
types.
parent bbbb58f0
...@@ -549,19 +549,8 @@ def pyqq(obj): ...@@ -549,19 +549,8 @@ def pyqq(obj):
# py2: unicode | str # py2: unicode | str
# py3: str | bytes # py3: str | bytes
if not isinstance(obj, (unicode, bytes)): if not isinstance(obj, (unicode, bytes)):
obj = str(obj) obj = _bstringify(obj)
return pystrconv.quote(obj)
qobj = pystrconv.quote(obj)
# `printf('%s', qq(obj))` should work. For this make sure qobj is always
# a-la str type (unicode on py3, bytes on py2), that can be transparently
# converted to unicode or bytes as needed.
if PY_MAJOR_VERSION >= 3:
qobj = pyu(qobj)
else:
qobj = pyb(qobj)
return qobj
......
...@@ -672,15 +672,22 @@ def test_strings_subclasses(tx): ...@@ -672,15 +672,22 @@ def test_strings_subclasses(tx):
def test_qq(): def test_qq():
# NOTE qq is also tested as part of strconv.quote # NOTE qq is also tested as part of strconv.quote
# qq(any) returns string type # qq(any) -> bstr
assert isinstance(qq(b('мир')), str) # qq(b) -> str (bytes·py2, unicode·py3) def _(s, qqok):
assert isinstance(qq( u'мир'), str) # qq(u) -> str (bytes·py2, unicode·py3) _ = qq(s)
assert type(_) is bstr
# however what qq returns can be mixed with both unicode and bytes assert _ == qqok
assert b'hello %s !' % qq(b('мир')) == b('hello "мир" !') # b % qq(b)
assert b'hello %s !' % qq(u('мир')) == b('hello "мир" !') # b % qq(u) -> b _( xbytes('мир'), '"мир"') # b''
assert u'hello %s !' % qq(u('мир')) == u('hello "мир" !') # u % qq(u) _( u'мир', '"мир"') # u''
assert u'hello %s !' % qq(b('мир')) == u'hello "мир" !' # u % qq(b) -> u _( xbytearray('мир'), '"мир"') # bytearray()
_( b('мир'), '"мир"') # b()
_( u('мир'), '"мир"') # u()
_( 1, '"1"') # int
# what qq returns - bstr - can be mixed with both unicode, bytes and bytearray
# it is tested e.g. in test_strings_ops2 and test_strings_mod_and_format
# ---------------------------------------- # ----------------------------------------
......
...@@ -22,29 +22,16 @@ ...@@ -22,29 +22,16 @@
from __future__ import print_function, absolute_import from __future__ import print_function, absolute_import
import unicodedata, codecs import unicodedata, codecs
from six import text_type as unicode # py2: unicode py3: str
from six.moves import range as xrange from six.moves import range as xrange
from golang import b, u from golang import b
from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
# _bstr is like b but also returns whether input was unicode. # quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
def _bstr(s): # -> sbytes, wasunicode def quote(s): # -> bstr
return b(s), isinstance(s, unicode) q = _quote(b(s))
return b(q)
# _ustr is like u but also returns whether input was bytes.
def _ustr(s): # -> sunicode, wasbytes
return u(s), isinstance(s, bytes)
# quote quotes unicode|bytes string into valid "..." unicode|bytes string always quoted with ".
def quote(s):
s, wasunicode = _bstr(s)
qs = _quote(s)
if wasunicode:
qs, _ = _ustr(qs)
return qs
def _quote(s): def _quote(s):
assert isinstance(s, bytes) assert isinstance(s, bytes)
...@@ -103,7 +90,7 @@ def _quote(s): ...@@ -103,7 +90,7 @@ def _quote(s):
# unquote decodes "-quoted unicode|byte string. # unquote decodes "-quoted unicode|byte string.
# #
# ValueError is raised if there are quoting syntax errors. # ValueError is raised if there are quoting syntax errors.
def unquote(s): def unquote(s): # -> bstr
us, tail = unquote_next(s) us, tail = unquote_next(s)
if len(tail) != 0: if len(tail) != 0:
raise ValueError('non-empty tail after closing "') raise ValueError('non-empty tail after closing "')
...@@ -114,13 +101,9 @@ def unquote(s): ...@@ -114,13 +101,9 @@ def unquote(s):
# it returns -> (unquoted(s), tail-after-") # it returns -> (unquoted(s), tail-after-")
# #
# ValueError is raised if there are quoting syntax errors. # ValueError is raised if there are quoting syntax errors.
def unquote_next(s): def unquote_next(s): # -> (bstr, bstr)
s, wasunicode = _bstr(s) us, tail = _unquote_next(b(s))
us, tail = _unquote_next(s) return b(us), b(tail)
if wasunicode:
us, _ = _ustr(us)
tail, _ = _ustr(tail)
return us, tail
def _unquote_next(s): def _unquote_next(s):
assert isinstance(s, bytes) assert isinstance(s, bytes)
......
...@@ -20,10 +20,11 @@ ...@@ -20,10 +20,11 @@
from __future__ import print_function, absolute_import from __future__ import print_function, absolute_import
from golang import bstr
from golang.strconv import quote, unquote, unquote_next from golang.strconv import quote, unquote, unquote_next
from golang.gcompat import qq from golang.gcompat import qq
from six import int2byte as bchr, PY3 from six import int2byte as bchr
from six.moves import range as xrange from six.moves import range as xrange
from pytest import raises from pytest import raises
...@@ -34,16 +35,9 @@ def byterange(start, stop): ...@@ -34,16 +35,9 @@ def byterange(start, stop):
return b return b
# asstr converts unicode|bytes to str type of current python. def assert_bstreq(x, y):
def asstr(s): assert type(x) is bstr
if PY3: assert x == y
if isinstance(s, bytes):
s = s.decode('utf-8')
# PY2
else:
if isinstance(s, unicode):
s = s.encode('utf-8')
return s
def test_quote(): def test_quote():
testv = ( testv = (
...@@ -72,6 +66,9 @@ def test_quote(): ...@@ -72,6 +66,9 @@ def test_quote():
(u'\ufffd', u'�'), (u'\ufffd', u'�'),
) )
# quote/unquote* always give bstr
BEQ = assert_bstreq
for tin, tquoted in testv: for tin, tquoted in testv:
# quote(in) == quoted # quote(in) == quoted
# in = unquote(quoted) # in = unquote(quoted)
...@@ -79,14 +76,13 @@ def test_quote(): ...@@ -79,14 +76,13 @@ def test_quote():
tail = b'123' if isinstance(tquoted, bytes) else '123' tail = b'123' if isinstance(tquoted, bytes) else '123'
tquoted = q + tquoted + q # add lead/trail " tquoted = q + tquoted + q # add lead/trail "
assert quote(tin) == tquoted BEQ(quote(tin), tquoted)
assert unquote(tquoted) == tin BEQ(unquote(tquoted), tin)
assert unquote_next(tquoted) == (tin, type(tin)()) _, __ = unquote_next(tquoted); BEQ(_, tin); BEQ(__, "")
assert unquote_next(tquoted + tail) == (tin, tail) _, __ = unquote_next(tquoted + tail); BEQ(_, tin); BEQ(__, tail)
with raises(ValueError): unquote(tquoted + tail) with raises(ValueError): unquote(tquoted + tail)
# qq always gives str BEQ(qq(tin), tquoted)
assert qq(tin) == asstr(tquoted)
# also check how it works on complementary unicode/bytes input type # also check how it works on complementary unicode/bytes input type
if isinstance(tin, bytes): if isinstance(tin, bytes):
...@@ -103,14 +99,13 @@ def test_quote(): ...@@ -103,14 +99,13 @@ def test_quote():
tquoted = tquoted.encode('utf-8') tquoted = tquoted.encode('utf-8')
tail = tail.encode('utf-8') tail = tail.encode('utf-8')
assert quote(tin) == tquoted BEQ(quote(tin), tquoted)
assert unquote(tquoted) == tin BEQ(unquote(tquoted), tin)
assert unquote_next(tquoted) == (tin, type(tin)()) _, __ = unquote_next(tquoted); BEQ(_, tin); BEQ(__, "")
assert unquote_next(tquoted + tail) == (tin, tail) _, __ = unquote_next(tquoted + tail); BEQ(_, tin); BEQ(__, tail)
with raises(ValueError): unquote(tquoted + tail) with raises(ValueError): unquote(tquoted + tail)
# qq always gives str BEQ(qq(tin), tquoted)
assert qq(tin) == asstr(tquoted)
# verify that non-canonical quotation can be unquoted too. # verify that non-canonical quotation can be unquoted too.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment