Commit a341f761 authored by Kirill Smelkov's avatar Kirill Smelkov

X golang_str: Fix bstr/ustr __eq__ and friends to return NotImplemented wrt non-string types

In 54c2a3cf (golang_str: Teach bstr/ustr to compare wrt any
string with automatic coercion) I've added __eq__, __ne__, __lt__ etc
methods to our strings, but __lt__ and other comparison to raise
TypeError against any non-string type. My idea was to mimic user-visible
py3 behaviour such as

    >>> "abc" > 1
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: '>' not supported between instances of 'str' and 'int'

However it turned out that the implementation was not exactly matching
what Python is doing internally which lead to incorrect behaviour when
bstr or ustr is compared wrt another type with its own __cmp__. In the
general case for `a op b` Python first queries a.__op__(b) and
b.__op'__(a) and sometimes other methods before going to .__cmp__. This
relies on the methods to return NotImplemented instead of raising an
exception and if a trial raises TypeError everything is stopped and that
TypeError is returned to the caller.

Jérome reports a real breakage due to this when bstr is compared wrt
distutils.version.LooseVersion . LooseVersion is basically

    class LooseVersion(Version):
        def __cmp__ (self, other):
            if isinstance(other, StringType):
                other = LooseVersion(other)

            return cmp(self.version, other.version)

but due to my thinko on `LooseVersion < bstr` the control flow was not
getting into that LooseVersion.__cmp__ because bstr.__gt__ was tried
first and raised TypeError.

-> Fix all comparison operations to return NotImplemented instead of
raising TypeError and make sure in the tests that this behaviour exactly
matches what native str type does.

The fix is needed not only for py2 because added test_strings_cmp_wrt_distutils_LooseVersion
was failing on py3 as well without the fix.

/reported-by @jerome
/reported-on nexedi/slapos!1575 (comment 206080)
parent cb0e6055
Pipeline #34422 failed with stage
in 0 seconds
......@@ -383,19 +383,48 @@ cdef class _pybstr(bytes): # https://github.com/cython/cython/issues/711
return zbytes.__hash__(self)
# == != < > <= >=
# NOTE == and != are special: they must succeed against any type so that
# bstr could be used as dict key.
# NOTE all operations must succeed against any type so that bstr could be
# used as dict key and arbitrary three-way comparisons, done by python,
# work correctly. This means that on py2 e.g. `bstr > int` will behave
# exactly as builtin str and won't raise TypeError. On py3 TypeError is
# raised for such operations by python itself when it receives
# NotImplemented from all tried methods.
def __eq__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return False
return NotImplemented
return zbytes.__eq__(a, b)
def __ne__(a, b): return not a.__eq__(b)
def __lt__(a, b): return zbytes.__lt__(a, _pyb_coerce(b))
def __gt__(a, b): return zbytes.__gt__(a, _pyb_coerce(b))
def __le__(a, b): return zbytes.__le__(a, _pyb_coerce(b))
def __ge__(a, b): return zbytes.__ge__(a, _pyb_coerce(b))
def __ne__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return NotImplemented
return zbytes.__ne__(a, b)
def __lt__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return NotImplemented
return zbytes.__lt__(a, _pyb_coerce(b))
def __gt__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return NotImplemented
return zbytes.__gt__(a, _pyb_coerce(b))
def __le__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return NotImplemented
return zbytes.__le__(a, _pyb_coerce(b))
def __ge__(a, b):
try:
b = _pyb_coerce(b)
except TypeError:
return NotImplemented
return zbytes.__ge__(a, _pyb_coerce(b))
# len - no need to override
......@@ -724,19 +753,44 @@ cdef class _pyustr(unicode):
return hash(pyb(self))
# == != < > <= >=
# NOTE == and != are special: they must succeed against any type so that
# ustr could be used as dict key.
# NOTE all operations must succeed against any type.
# See bstr for details.
def __eq__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return False
return NotImplemented
return zunicode.__eq__(a, b)
def __ne__(a, b): return not a.__eq__(b)
def __lt__(a, b): return zunicode.__lt__(a, _pyu_coerce(b))
def __gt__(a, b): return zunicode.__gt__(a, _pyu_coerce(b))
def __le__(a, b): return zunicode.__le__(a, _pyu_coerce(b))
def __ge__(a, b): return zunicode.__ge__(a, _pyu_coerce(b))
def __ne__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return NotImplemented
return zunicode.__ne__(a, b)
def __lt__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return NotImplemented
return zunicode.__lt__(a, _pyu_coerce(b))
def __gt__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return NotImplemented
return zunicode.__gt__(a, _pyu_coerce(b))
def __le__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return NotImplemented
return zunicode.__le__(a, _pyu_coerce(b))
def __ge__(a, b):
try:
b = _pyu_coerce(b)
except TypeError:
return NotImplemented
return zunicode.__ge__(a, _pyu_coerce(b))
# len - no need to override
......
......@@ -942,10 +942,18 @@ def test_strings_ops2_bufreject(tx, ty):
assert (x == y) is False # see test_strings_ops2_eq_any
assert (x != y) is True
with raises(TypeError): x >= y
with raises(TypeError): x <= y
with raises(TypeError): x > y
with raises(TypeError): x < y
if six.PY3:
with raises(TypeError): "abc" >= y # x.__op__(y) and y.__op'__(x) both return
with raises(TypeError): x >= y # NotImplemented which leads py3 to raise TypeError
with raises(TypeError): x <= y
with raises(TypeError): x > y
with raises(TypeError): x < y
else:
"abc" >= y # does not raise but undefined
x >= y # ----//----
x <= y
x > y
x < y
# reverse operations, e.g. memoryview + bstr
with raises(TypeError): y + x
......@@ -959,10 +967,18 @@ def test_strings_ops2_bufreject(tx, ty):
y == x # not raises TypeError - see test_strings_ops2_eq_any
y != x #
if tx is not bstr:
with raises(TypeError): y >= x
with raises(TypeError): y <= x
with raises(TypeError): y > x
with raises(TypeError): y < x
if six.PY3:
with raises(TypeError): y >= "abc" # see ^^^
with raises(TypeError): y >= x
with raises(TypeError): y <= x
with raises(TypeError): y > x
with raises(TypeError): y < x
else:
y >= "abc"
y >= x
y <= x
y > x
y < x
# verify string operations like `x == *` for x being bstr/ustr.
......@@ -982,10 +998,19 @@ def test_strings_ops2_eq_any(tx):
def assertNE(y):
assert (x == y) is False
assert (x != y) is True
with raises(TypeError): x >= y
with raises(TypeError): x <= y
with raises(TypeError): x > y
with raises(TypeError): x < y
if six.PY3:
with raises(TypeError): "abc" >= y # py3: NotImplemented -> raise
with raises(TypeError): x >= y
with raises(TypeError): x <= y
with raises(TypeError): x > y
with raises(TypeError): x < y
else:
"abc" >= y # py2: no raise on NotImplemented; result is undefined
x >= y
x <= y
x > y
x < y
_ = assertNE
_(None)
......@@ -1009,6 +1034,21 @@ def test_strings_ops2_eq_any(tx):
with raises(TypeError): hash(l)
_(l)
# also verify that internally x.__op__(y of non-string-type) returns
# NotImplemented - exactly the same way as builtin str type does. Even
# though `x op y` gives proper answer internally python counts on x.__op__(y)
# to return NotImplemented so that arbitrary three-way comparison works properly.
s = xstr(u'мир', str)
for op in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
sop = getattr(s, '__%s__' % op)
xop = getattr(x, '__%s__' % op)
assert sop(None) is NotImplemented
assert xop(None) is NotImplemented
assert sop(0) is NotImplemented
assert xop(0) is NotImplemented
assert sop(hx) is NotImplemented
assert xop(hx) is NotImplemented
# verify logic in `bstr % ...` and `bstr.format(...)` .
def test_strings_mod_and_format():
......@@ -2624,6 +2664,44 @@ def tests_strings_early_str_subclass():
# XXX more...
# ---- issues hit by users ----
# fixes for below issues have their corresponding tests in the main part above, but
# we also add tests with original code where problems were hit.
# three-way comparison wrt class with __cmp__ was working incorrectly because
# bstr.__op__ were not returning NotImplemented wrt non-string types.
# https://lab.nexedi.com/nexedi/slapos/-/merge_requests/1575#note_206080
@mark.parametrize('tx', (str, bstr if str is bytes else ustr)) # LooseVersion does not handle unicode on py2
def test_strings_cmp_wrt_distutils_LooseVersion(tx):
from distutils.version import LooseVersion
l = LooseVersion('1.16.2')
x = xstr('1.12', tx)
assert not (x == l)
assert not (l == x)
assert x != l
assert l != x
assert not (x >= l)
assert l >= x
assert x <= l
assert not (l <= x)
assert x < l
assert not (l < x)
x = xstr('1.16.2', tx)
assert x == l
assert l == x
assert not (x != l)
assert not (l != x)
assert x >= l
assert l >= x
assert x <= l
assert l <= x
assert not (x < l)
assert not (l < x)
# ---- benchmarks ----
# utf-8 decoding
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment