golang_str: bstr/ustr string methods

Take all str/unicode methods, such as .capitalize(), .split(), .join(), etc, and implement them for bstr/ustr. For example bstr.split() behaves like unicode.split(), but returns list of bstr instead of list of unicode. And similarly for all other methods. Organize testing of this via verifying every method behaviour on all unicode and bstr/ustr. If the results match by modulo of deep replacing unicode to bstr/ustr - everything is ok.

golang_str: bstr/ustr string methods
Take all str/unicode methods, such as .capitalize(), .split(), .join(), etc, and implement them for bstr/ustr. For example bstr.split() behaves like unicode.split(), but returns list of bstr instead of list of unicode. And similarly for all other methods. Organize testing of this via verifying every method behaviour on all unicode and bstr/ustr. If the results match by modulo of deep replacing unicode to bstr/ustr - everything is ok.
ff24be3d · Kirill Smelkov · 2c20c055 · ff24be3d · ff24be3d
Commit ff24be3d authored Oct 07, 2022 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 698 additions and 0 deletions

golang/_golang_str.pyx golang/_golang_str.pyx +259 -0

golang/golang_str_test.py golang/golang_str_test.py +439 -0

No files found.
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -29,6 +29,7 @@ from cpython cimport Py_EQ, Py_NE, Py_LT, Py_GT, Py_LE, Py_GE
 from cpython.iterobject cimport PySeqIter_New
 from cpython cimport PyObject_CheckBuffer
 cdef extern from "Python.h":
+    Py_ssize_t PY_SSIZE_T_MAX
    void PyType_Modified(PyTypeObject *)

 cdef extern from "Python.h":
@@ -329,6 +330,13 @@ class pybstr(bytes):
        return pyu(self).__iter__()


+    # __contains__
+    def __contains__(self, key):
+        # NOTE on py3 bytes.__contains__ accepts numbers and buffers. We don't want to
+        # automatically coerce any of them to bytestrings
+        return bytes.__contains__(self, _pyb_coerce(key))
+
+
    # __add__, __radd__     (no need to override __iadd__)
    def __add__(a, b):
        # NOTE Cython < 3 does not automatically support __radd__ for cdef class
@@ -355,6 +363,101 @@ class pybstr(bytes):
        return b.__mul__(a)


+    # all other string methods
+
+    def capitalize(self):                       return pyb(pyu(self).capitalize())
+    if _strhas('casefold'): # py3.3  TODO provide py2 implementation
+        def casefold(self):                     return pyb(pyu(self).casefold())
+    def center(self, width, fillchar=' '):      return pyb(pyu(self).center(width, fillchar))
+
+    def count(self, sub, start=None, end=None): return bytes.count(self, _pyb_coerce(sub), start, end)
+
+    def endswith(self, suffix, start=None, end=None):
+        if isinstance(suffix, tuple):
+            for _ in suffix:
+                if self.endswith(_pyb_coerce(_), start, end):
+                    return True
+            return False
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return bytes.endswith(self, _pyb_coerce(suffix), start, end)
+
+    def expandtabs(self, tabsize=8):            return pyb(pyu(self).expandtabs(tabsize))
+
+    # NOTE find/index & friends should return byte-position, not unicode-position
+    def find(self, sub, start=None, end=None):  return bytes.find(self, _pyb_coerce(sub), start, end)
+    def index(self, sub, start=None, end=None): return bytes.index(self, _pyb_coerce(sub), start, end)
+
+    def isalnum(self):      return pyu(self).isalnum()
+    def isalpha(self):      return pyu(self).isalpha()
+    # isascii(self)         no need to override
+    def isdecimal(self):    return pyu(self).isdecimal()
+    def isdigit(self):      return pyu(self).isdigit()
+    if _strhas('isidentifier'): # py3  TODO provide fallback implementation
+        def isidentifier(self): return pyu(self).isidentifier()
+    def islower(self):      return pyu(self).islower()
+    def isnumeric(self):    return pyu(self).isnumeric()
+    if _strhas('isprintable'):  # py3  TODO provide fallback implementation
+        def isprintable(self):  return pyu(self).isprintable()
+    def isspace(self):      return pyu(self).isspace()
+    def istitle(self):      return pyu(self).istitle()
+
+    def join(self, iterable):               return pyb(bytes.join(self, (_pyb_coerce(_) for _ in iterable)))
+    def ljust(self, width, fillchar=' '):   return pyb(pyu(self).ljust(width, fillchar))
+    def lower(self):                        return pyb(pyu(self).lower())
+    def lstrip(self, chars=None):           return pyb(pyu(self).lstrip(chars))
+    def partition(self, sep):               return tuple(pyb(_) for _ in bytes.partition(self, _pyb_coerce(sep)))
+    if _strhas('removeprefix'): # py3.9  TODO provide fallback implementation
+        def removeprefix(self, prefix):     return pyb(pyu(self).removeprefix(prefix))
+    if _strhas('removesuffix'): # py3.9  TODO provide fallback implementation
+        def removesuffix(self, suffix):     return pyb(pyu(self).removesuffix(suffix))
+    def replace(self, old, new, count=-1):  return pyb(bytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count))
+
+    # NOTE rfind/rindex & friends should return byte-position, not unicode-position
+    def rfind(self, sub, start=None, end=None):   return bytes.rfind(self, _pyb_coerce(sub), start, end)
+    def rindex(self, sub, start=None, end=None):  return bytes.rindex(self, _pyb_coerce(sub), start, end)
+
+    def rjust(self, width, fillchar=' '):   return pyb(pyu(self).rjust(width, fillchar))
+    def rpartition(self, sep):              return tuple(pyb(_) for _ in bytes.rpartition(self, _pyb_coerce(sep)))
+    def rsplit(self, sep=None, maxsplit=-1):
+        v = pyu(self).rsplit(sep, maxsplit)
+        return list([pyb(_) for _ in v])
+    def rstrip(self, chars=None):           return pyb(pyu(self).rstrip(chars))
+    def split(self, sep=None, maxsplit=-1):
+        v = pyu(self).split(sep, maxsplit)
+        return list([pyb(_) for _ in v])
+    def splitlines(self, keepends=False):   return list(pyb(_) for _ in pyu(self).splitlines(keepends))
+
+    def startswith(self, prefix, start=None, end=None):
+        if isinstance(prefix, tuple):
+            for _ in prefix:
+                if self.startswith(_pyb_coerce(_), start, end):
+                    return True
+            return False
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return bytes.startswith(self, _pyb_coerce(prefix), start, end)
+
+    def strip(self, chars=None):            return pyb(pyu(self).strip(chars))
+    def swapcase(self):                     return pyb(pyu(self).swapcase())
+    def title(self):                        return pyb(pyu(self).title())
+    def translate(self, table, delete=None):
+        # bytes mode  (compatibility with str/py2)
+        if table is None  or isinstance(table, bytes)  or  delete is not None:
+            if delete is None:  delete = b''
+            return pyb(bytes.translate(self, table, delete))
+        # unicode mode
+        else:
+            return pyb(pyu(self).translate(table))
+
+    def upper(self):                        return pyb(pyu(self).upper())
+    def zfill(self, width):                 return pyb(pyu(self).zfill(width))
+
+    @staticmethod
+    def maketrans(x=None, y=None, z=None):
+        return pyustr.maketrans(x, y, z)
+
+
 # XXX cannot `cdef class` with __new__: https://github.com/cython/cython/issues/799
 class pyustr(unicode):
    """ustr is unicode-string.
@@ -465,6 +568,11 @@ class pyustr(unicode):
            return PySeqIter_New(self)


+    # __contains__
+    def __contains__(self, key):
+        return unicode.__contains__(self, _pyu_coerce(key))
+
+
    # __add__, __radd__     (no need to override __iadd__)
    def __add__(a, b):
        # NOTE Cython < 3 does not automatically support __radd__ for cdef class
@@ -493,6 +601,150 @@ class pyustr(unicode):
        return b.__mul__(a)


+    # all other string methods
+
+    def capitalize(self):   return pyu(unicode.capitalize(self))
+    if _strhas('casefold'): # py3.3  TODO provide fallback implementation
+        def casefold(self): return pyu(unicode.casefold(self))
+    def center(self, width, fillchar=' '):      return pyu(unicode.center(self, width, _pyu_coerce(fillchar)))
+    def count(self, sub, start=None, end=None):
+        # cython optimizes unicode.count to directly call PyUnicode_Count -
+        # - cannot use None for start/stop  https://github.com/cython/cython/issues/4737
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.count(self, _pyu_coerce(sub), start, end)
+    def endswith(self, suffix, start=None, end=None):
+        if isinstance(suffix, tuple):
+            for _ in suffix:
+                if self.endswith(_pyu_coerce(_), start, end):
+                    return True
+            return False
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.endswith(self, _pyu_coerce(suffix), start, end)
+    def expandtabs(self, tabsize=8):            return pyu(unicode.expandtabs(self, tabsize))
+    def find(self, sub, start=None, end=None):
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.find(self, _pyu_coerce(sub), start, end)
+    def index(self, sub, start=None, end=None):
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.index(self, _pyu_coerce(sub), start, end)
+
+    # isalnum(self)         no need to override
+    # isalpha(self)         no need to override
+    # isascii(self)         no need to override
+    # isdecimal(self)       no need to override
+    # isdigit(self)         no need to override
+    # isidentifier(self)    no need to override
+    # islower(self)         no need to override
+    # isnumeric(self)       no need to override
+    # isprintable(self)     no need to override
+    # isspace(self)         no need to override
+    # istitle(self)         no need to override
+
+    def join(self, iterable):               return pyu(unicode.join(self, (_pyu_coerce(_) for _ in iterable)))
+    def ljust(self, width, fillchar=' '):   return pyu(unicode.ljust(self, width, _pyu_coerce(fillchar)))
+    def lower(self):                        return pyu(unicode.lower(self))
+    def lstrip(self, chars=None):           return pyu(unicode.lstrip(self, _xpyu_coerce(chars)))
+    def partition(self, sep):               return tuple(pyu(_) for _ in unicode.partition(self, _pyu_coerce(sep)))
+    if _strhas('removeprefix'): # py3.9  TODO provide fallback implementation
+        def removeprefix(self, prefix):     return pyu(unicode.removeprefix(self, _pyu_coerce(prefix)))
+    if _strhas('removesuffix'): # py3.9  TODO provide fallback implementation
+        def removesuffix(self, suffix):     return pyu(unicode.removesuffix(self, _pyu_coerce(suffix)))
+    def replace(self, old, new, count=-1):  return pyu(unicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count))
+    def rfind(self, sub, start=None, end=None):
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.rfind(self, _pyu_coerce(sub), start, end)
+    def rindex(self, sub, start=None, end=None):
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.rindex(self, _pyu_coerce(sub), start, end)
+    def rjust(self, width, fillchar=' '):   return pyu(unicode.rjust(self, width, _pyu_coerce(fillchar)))
+    def rpartition(self, sep):              return tuple(pyu(_) for _ in unicode.rpartition(self, _pyu_coerce(sep)))
+    def rsplit(self, sep=None, maxsplit=-1):
+        v = unicode.rsplit(self, _xpyu_coerce(sep), maxsplit)
+        return list([pyu(_) for _ in v])
+    def rstrip(self, chars=None):           return pyu(unicode.rstrip(self, _xpyu_coerce(chars)))
+    def split(self, sep=None, maxsplit=-1):
+        # cython optimizes unicode.split to directly call PyUnicode_Split - cannot use None for sep
+        # and cannot also use object=NULL  https://github.com/cython/cython/issues/4737
+        if sep is None:
+            if PY_MAJOR_VERSION >= 3:
+                v = unicode.split(self, maxsplit=maxsplit)
+            else:
+                # on py2 unicode.split does not accept keyword arguments
+                v = _udata(self).split(None, maxsplit)
+        else:
+            v = unicode.split(self, _pyu_coerce(sep), maxsplit)
+        return list([pyu(_) for _ in v])
+    def splitlines(self, keepends=False):   return list(pyu(_) for _ in unicode.splitlines(self, keepends))
+    def startswith(self, prefix, start=None, end=None):
+        if isinstance(prefix, tuple):
+            for _ in prefix:
+                if self.startswith(_pyu_coerce(_), start, end):
+                    return True
+            return False
+        if start is None: start = 0
+        if end   is None: end   = PY_SSIZE_T_MAX
+        return unicode.startswith(self, _pyu_coerce(prefix), start, end)
+    def strip(self, chars=None):            return pyu(unicode.strip(self, _xpyu_coerce(chars)))
+    def swapcase(self):                     return pyu(unicode.swapcase(self))
+    def title(self):                        return pyu(unicode.title(self))
+
+    def translate(self, table):
+        # unicode.translate does not accept bstr values
+        t = {}
+        for k,v in table.items():
+            if not isinstance(v, int):  # either unicode ordinal,
+                v = _xpyu_coerce(v)     # character or None
+            t[k] = v
+        return pyu(unicode.translate(self, t))
+
+    def upper(self):                        return pyu(unicode.upper(self))
+    def zfill(self, width):                 return pyu(unicode.zfill(self, width))
+
+    @staticmethod
+    def maketrans(x=None, y=None, z=None):
+        if PY_MAJOR_VERSION >= 3:
+            if y is None:
+                # std maketrans(x) accepts only int|unicode keys
+                _ = {}
+                for k,v in x.items():
+                    if not isinstance(k, int):
+                        k = pyu(k)
+                    _[k] = v
+                return unicode.maketrans(_)
+            elif z is None:
+                return unicode.maketrans(pyu(x), pyu(y))  # std maketrans does not accept b
+            else:
+                return unicode.maketrans(pyu(x), pyu(y), pyu(z))  # ----//----
+
+        # hand-made on py2
+        t = {}
+        if y is not None:
+            x = pyu(x)
+            y = pyu(y)
+            if len(x) != len(y):
+                raise ValueError("len(x) must be == len(y))")
+            for (xi,yi) in zip(x,y):
+                t[ord(xi)] = ord(yi)
+            if z is not None:
+                z = pyu(z)
+                for _ in z:
+                    t[ord(_)] = None
+        else:
+            if type(x) is not dict:
+                raise TypeError("sole x must be dict")
+            for k,v in x.iteritems():
+                if not isinstance(k, (int,long)):
+                    k = ord(pyu(k))
+                t[k] = pyu(v)
+        return t
+
+
 # _pyustrIter wraps unicode iterator to return pyustr for each yielded character.
 cdef class _pyustrIter:
    cdef object uiter
@@ -769,6 +1021,13 @@ cdef class _UnboundMethod(object): # they removed unbound methods on py3

 # ---- misc ----

+# _strhas returns whether unicode string type has specified method.
+cdef bint _strhas(str meth) except *:
+    return hasattr(unicode, meth)
+
+cdef object _xpyu_coerce(obj):
+    return _pyu_coerce(obj) if obj is not None else None
+
 # _buffer_py2 returns buffer(obj) on py2 / fails on py3
 cdef object _buffer_py2(object obj):
    IF PY2:                 # cannot `if PY_MAJOR_VERSION < 3` because then cython errors

--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -431,6 +431,144 @@ def test_strings_index():
    assert _[3:12]  ==             b'\xb8\xd1\x80\xd1\x83 \xd0\xbc\xd0'
    assert _[1:-1:2]== b'\xbc\xb8\x80\x83\xd0\xd0\xd1'

+    # u/unicode:  index/rindex/find/rfind  return character-position
+    #             methods that accept start/stop also treat them as character position
+    #
+    # b/bytes:    index/rindex/find/rfind  return byte-position
+    #             methods that accept start/stop also treat them as byte-position
+    #
+    # b/u:        methods does not automatically coerce buffers to strings
+    class CheckOp:
+        def __init__(self, xs, x_, str2std):
+            self.xs = xs
+            self.x_ = x_
+            self.str2std = str2std
+        def __getattr__(self, meth):
+            def _(*argv):
+                argv_ = deepReplaceStr(argv, self.str2std)
+                x = xcall(self.xs, meth, *argv)
+                y = xcall(self.x_, meth, *argv_)
+                assert type(x) is type(y)
+                if isinstance(x, Exception):
+                    assert str(x) == str(y) # ValueError('x') == ValueError('x')  is false
+                else:
+                    assert x == y
+
+                # also test xs.meth(unicode|bytes|bytearray | bstr|ustr)
+                for zt in [xunicode, xbytes, xbytearray, b, u]:
+                    argv_z = deepReplaceStr(argv, zt)
+                    z = xcall(self.xs, meth, *argv_z)
+                    assert type(z) is type(x)
+                    if isinstance(x, Exception):
+                        assert str(z) == str(x)
+                    else:
+                        assert z == x
+
+                # buffers should not be accepted
+                for tbuf in buftypes:
+                    have_m = [False]
+                    def _(s):
+                        have_m[0] = True
+                        return tbuf(xbytes(s))
+                    argv_m = deepReplaceStr(argv, _)
+                    if have_m[0]:
+                        with raises(TypeError):
+                            getattr(self.xs, meth)(*argv_m)
+
+                return x
+            return _
+    U = CheckOp(us, u_, xunicode)
+    B = CheckOp(bs, b_, xbytes)
+
+    assert U.count("α")             == 0
+    assert B.count("α")             == 0
+    assert U.count("и")             == 2
+    assert B.count("и")             == 2
+    assert U.count("ир")            == 2
+    assert B.count("ир")            == 2
+    assert U.count("ир", 2)         == 1
+    assert B.count("ир", 2)         == 2
+    assert U.count("ир", 2, 7)      == 0
+    assert B.count("ир", 2, 7)      == 1
+
+    assert U.find("α")              == -1
+    assert B.find("α")              == -1
+    assert U.find("ир")             == 1
+    assert B.find("ир")             == 2
+    assert U.find("ир", 2)          == 6
+    assert B.find("ир", 2)          == 2
+    assert U.find("ир", 2, 7)       == -1
+    assert B.find("ир", 2, 7)       == 2
+
+    assert U.rfind("α")             == -1
+    assert B.rfind("α")             == -1
+    assert U.rfind("ир")            == 6
+    assert B.rfind("ир")            == 11
+    assert U.rfind("ир", 2)         == 6
+    assert B.rfind("ир", 2)         == 11
+    assert U.rfind("ир", 2, 7)      == -1
+    assert B.rfind("ир", 2, 7)      == 2
+
+    _ =    U.index("α");            assert isinstance(_, ValueError)
+    _ =    B.index("α");            assert isinstance(_, ValueError)
+    assert U.index("ир")            == 1
+    assert B.index("ир")            == 2
+    assert U.index("ир", 2)         == 6
+    assert B.index("ир", 2)         == 2
+    _ =    U.index("ир", 2, 7);     assert isinstance(_, ValueError)
+    assert B.index("ир", 2, 7)      == 2
+
+    _ =    U.rindex("α");           assert isinstance(_, ValueError)
+    _ =    B.rindex("α");           assert isinstance(_, ValueError)
+    assert U.rindex("ир")           == 6
+    assert B.rindex("ир")           == 11
+    assert U.rindex("ир", 2)        == 6
+    assert B.rindex("ир", 2)        == 11
+    _ =    U.rindex("ир", 2, 7);    assert isinstance(_, ValueError)
+    assert B.rindex("ир", 2, 7)     == 2
+
+    assert U.startswith("α")        == False
+    assert B.startswith("α")        == False
+    assert U.startswith("мир")      == True
+    assert B.startswith("мир")      == True
+    assert U.startswith("мир", 5)   == True
+    assert B.startswith("мир", 5)   == False
+    assert U.startswith("мир", 5, 7)== False
+    assert B.startswith("мир", 5, 7)== False
+    assert U.startswith(())         == False
+    assert B.startswith(())         == False
+    assert U.startswith(("α",))     == False
+    assert B.startswith(("α",))     == False
+    assert U.startswith(("α","β"))  == False
+    assert B.startswith(("α","β"))  == False
+    assert U.startswith(("α","β","ир"))  == False
+    assert B.startswith(("α","β","ир"))  == False
+    assert U.startswith(("α","β","мир")) == True
+    assert B.startswith(("α","β","мир")) == True
+
+    assert U.endswith("α")          == False
+    assert B.endswith("α")          == False
+    assert U.endswith("мир")        == True
+    assert B.endswith("мир")        == True
+    assert U.endswith("мир", 2)     == True
+    assert B.endswith("мир", 2)     == True
+    assert U.endswith("мир", 2, 7)  == False
+    assert B.endswith("мир", 2, 7)  == False
+    assert U.endswith("мир", None, 3) == True
+    assert B.endswith("мир", None, 3) == False
+    assert U.endswith("мир", None, 6) == False
+    assert B.endswith("мир", None, 6) == True
+    assert U.endswith(())           == False
+    assert B.endswith(())           == False
+    assert U.endswith(("α",))       == False
+    assert B.endswith(("α",))       == False
+    assert U.endswith(("α","β"))    == False
+    assert B.endswith(("α","β"))    == False
+    assert U.endswith(("α","β","ир"))  == True
+    assert B.endswith(("α","β","ир"))  == True
+    assert U.endswith(("α","β","мир")) == True
+    assert B.endswith(("α","β","мир")) == True
+

 # verify strings iteration.
 def test_strings_iter():
@@ -574,6 +712,9 @@ def test_strings_ops2(tx, ty):

 # verify string operations like `x + y` for x being bstr/ustr and y being a
 # type unsupported for coercion.
+#
+# NOTE string methods, like .join and .startswith, are verified to reject
+# buffers in test_strings_methods and test_strings_index.
 @mark.parametrize('tx', (bstr, ustr))
 @mark.parametrize('ty', buftypes)
 def test_strings_ops2_bufreject(tx, ty):
@@ -582,6 +723,7 @@ def test_strings_ops2_bufreject(tx, ty):

    with raises(TypeError):     x + y
    with raises(TypeError):     x * y
+    with raises(TypeError):     y in x

    assert  (x == y) is False           # see test_strings_ops2_eq_any
    assert  (x != y) is True
@@ -594,6 +736,10 @@ def test_strings_ops2_bufreject(tx, ty):
    with raises(TypeError):     y + x
    with raises(TypeError):     y * x

+    # `x in y` does not raise: y is considered to be generic sequence without
+    # __contains__, and so python transforms `x in y` into `x in list(y)`.
+    #with raises(TypeError):     x in y
+
    # `y > x` does not raise when x is bstr (= provides buffer):
    y == x  # not raises TypeError  -  see test_strings_ops2_eq_any
    y != x  #
@@ -658,6 +804,287 @@ def test_strings_print():
    assertDoc(outok, stdout)


+# verify methods of bstr/ustr.
+def test_strings_methods():
+    # checkop verifies that `s.meth(*argv, **kw)` gives the same result for s,
+    # argv and kw being various combinations of unicode,bstr,ustr, bytes/bytearray.
+    def checkop(s, meth, *argv, **kw):
+        assert type(s) is str
+        ok = kw.pop('ok', None)
+        bs = b(s)
+        us = u(s)
+        # verify {str,bstr,ustr}.meth with str arguments
+        # on py2 use unicode(s/args) because e.g. 'мир'.capitalize()
+        # gives correct result only on unicode, not regular str.
+        argv_unicode = deepReplaceStr(argv, xunicode)
+        kw_unicode   = deepReplaceStr(kw,   xunicode)
+        if six.PY3:
+            r = xcall(s, meth, *argv, **kw)
+        else:
+            s = xunicode(s)
+            r = xcall(s, meth, *argv_unicode, **kw_unicode)
+
+        # we provide fallback implementations on e.g. py2
+        if ok is not None:
+            if six.PY2:
+                ok = xunicode(ok)
+            if isinstance(r, NotImplementedError):
+                r = ok
+            else:
+                assert r == ok
+
+        assert type(s) is unicode
+        br = xcall(bs, meth, *argv, **kw)
+        ur = xcall(us, meth, *argv, **kw)
+
+        def assertDeepEQ(a, b, bstrtype):
+            assert not isinstance(a, (bstr, ustr))
+            if type(a) is unicode:
+                assert type(b) is bstrtype
+                assert a == b
+                return
+
+            assert type(b) is type(a)
+
+            if isinstance(a, (list, tuple)):
+                assert len(a) == len(b)
+                for i in range(len(a)):
+                    assertDeepEQ(a[i], b[i], bstrtype)
+            elif isinstance(a, dict):
+                assert len(a) == len(b)
+                for k, v in a.items():
+                    v_ = b[k]
+                    assertDeepEQ(v, v_, bstrtype)
+            elif isinstance(a, Exception):
+                assertDeepEQ(a.args, b.args, type(''))  # NOTE bstr is not raised in exceptions
+            else:
+                assert a == b
+
+        assertDeepEQ(r, br, bstr)
+        assertDeepEQ(r, ur, ustr)
+
+        # verify {bstr,ustr}.meth with arguments being b/u instead of str
+        #
+        # NOTE str.meth does not work with b - on py3 e.g. unicode.center
+        # checks fillchar to be instance of unicode.
+        argv_b = deepReplaceStr(argv, b)
+        argv_u = deepReplaceStr(argv, u)
+        kw_b   = deepReplaceStr(kw,   b)
+        kw_u   = deepReplaceStr(kw,   u)
+
+        br_b = xcall(bs, meth, *argv_b, **kw_b)
+        br_u = xcall(bs, meth, *argv_u, **kw_u)
+        ur_b = xcall(us, meth, *argv_b, **kw_b)
+        ur_u = xcall(us, meth, *argv_u, **kw_u)
+
+        assertDeepEQ(r, br_b, bstr)
+        assertDeepEQ(r, br_u, bstr)
+        assertDeepEQ(r, ur_b, ustr)
+        assertDeepEQ(r, ur_u, ustr)
+
+        # verify {bstr,ustr}.meth with arguments being bytes/unicode/bytearray instead of str
+        argv_bytes = deepReplaceStr(argv, xbytes)
+        argv_barr  = deepReplaceStr2Bytearray(argv)
+        kw_bytes   = deepReplaceStr(kw,   xbytes)
+        kw_barr    = deepReplaceStr2Bytearray(kw)
+
+        br_bytes   = xcall(bs, meth, *argv_bytes,   **kw_bytes)
+        br_unicode = xcall(bs, meth, *argv_unicode, **kw_unicode)
+        br_barr    = xcall(bs, meth, *argv_barr,    **kw_barr)
+        ur_bytes   = xcall(us, meth, *argv_bytes,   **kw_bytes)
+        ur_unicode = xcall(us, meth, *argv_unicode, **kw_unicode)
+        ur_barr    = xcall(us, meth, *argv_barr,    **kw_barr)
+
+        assertDeepEQ(r, br_bytes,   bstr) # everything is converted to bstr, not bytes
+        assertDeepEQ(r, br_unicode, bstr) # ----//----                       not unicode
+        assertDeepEQ(r, br_barr,    bstr) # ----//----                       not bytearray
+        assertDeepEQ(r, ur_bytes,   ustr) # ----//----              to ustr
+        assertDeepEQ(r, ur_unicode, ustr)
+        assertDeepEQ(r, ur_barr,    ustr)
+
+        # verify that {bstr,ustr}.meth does not implicitly convert buffer to string
+        if not hasattr(bs, meth):  # e.g. bstr.removeprefix on py2
+            assert not hasattr(us, meth)
+            return
+
+        for tbuf in buftypes:
+            _bufview = [False]
+            def bufview(s):
+                _bufview[0] = True
+                return tbuf(xbytes(s))
+            argv_buf    = deepReplaceStr(argv, bufview)
+            argv_hasbuf = _bufview[0]
+
+            _bufview[0] = False
+            kw_buf      = deepReplaceStr(kw,   bufview)
+            kw_hasbuf   = _bufview[0]
+
+            if argv_hasbuf:
+                with raises(TypeError):
+                    getattr(bs, meth)(*argv_buf, **kw)
+                with raises(TypeError):
+                    getattr(us, meth)(*argv_buf, **kw)
+            if kw_hasbuf:
+                with raises(TypeError):
+                    getattr(bs, meth)(*argv, **kw_buf)
+                with raises(TypeError):
+                    getattr(us, meth)(*argv, **kw_buf)
+
+
+    # Verifier provides syntactic sugar for checkop: V.attr returns wrapper around checkop(V.text, attr).
+    class Verifier:
+        def __init__(self, text):
+            self.text = text
+        def __getattr__(self, meth):
+            def _(*argv, **kw):
+                checkop(self.text, meth, *argv, **kw)
+            return _
+
+    _ = Verifier
+
+    _("миру мир").__contains__("ру")
+    _("миру мир").__contains__("α")
+    _("мир").capitalize()
+    _("МиР").casefold()
+    _("мир").center(10)
+    _("мир").center(10, "ж")
+    # count, endswith       - tested in test_strings_index
+    _("миру\tмир").expandtabs()
+    _("миру\tмир").expandtabs(4)
+    # find, index           - tested in test_strings_index
+    _("мир").isalnum()
+    _("мир!").isalnum()
+    _("мир").isalpha()
+    _("мир!").isalpha()
+    _("мир").isascii()
+    _("hello").isascii()
+    _("hellЫ").isascii()
+    _("123 мир").isdecimal()
+    _("123 q").isdecimal()
+    _("123").isdecimal()
+    _("мир").isdigit()
+    _("123 мир").isdigit()
+    _("123 q").isdigit()
+    _("123").isdigit()
+    _("٤").isdigit()            # arabic 4
+    _("мир").isidentifier()
+    _("мир$").isidentifier()
+    _("мир").islower()
+    _("Мир").islower()
+    _("мир").isnumeric()
+    _("123").isnumeric()
+    _("0x123").isnumeric()
+    _("мир").isprintable()
+    _("\u2009").isspace()       # thin space
+    _("  ").isspace()
+    _("мир").isspace()
+    _("мир").istitle()
+    _("Мир").istitle()
+    _(" мир ").join(["да", "май", "труд"])
+    _("мир").ljust(10)
+    _("мир").ljust(10, 'ж')
+    _("МиР").lower()
+    _("\u2009 мир").lstrip()
+    _("\u2009 мир\u2009 ").lstrip()
+    _("мммир").lstrip('ми')
+    _("миру мир").partition('ру')
+    _("миру мир").partition('ж')
+    _("миру мир").removeprefix("мир")
+    _("миру мир").removesuffix("мир")
+    _("миру мир").replace("ир", "ж")
+    _("миру мир").replace("ир", "ж", 1)
+    # rfind, rindex         - tested in test_strings_index
+    _("мир").rjust(10)
+    _("мир").rjust(10, 'ж')
+    _("миру мир").rpartition('ру')
+    _("миру мир").rpartition('ж')
+    _("мир").rsplit()
+    _("привет мир").rsplit()
+    _("привет\u2009мир").rsplit()
+    _("привет мир").rsplit("и")
+    _("привет мир").rsplit("и", 1)
+    _("мир \u2009").rstrip()
+    _(" мир \u2009").rstrip()
+    _("мируу").rstrip('ру')
+    _("мир").split()
+    _("привет мир").split()
+    _("привет\u2009мир").split()
+    _("привет мир").split("и")
+    _("привет мир").split("и", 1)
+    _("мир").splitlines()
+    _("миру\nмир").splitlines()
+    _("миру\nмир").splitlines(True)
+    _("миру\nмир\n").splitlines(True)
+    _("мир\nтруд\nмай\n").splitlines()
+    _("мир\nтруд\nмай\n").splitlines(True)
+    # startswith            - tested in test_strings_index
+    _("\u2009 мир \u2009").strip()
+    _("миру мир").strip('мир')
+    _("МиР").swapcase()
+    _("МиР").title()
+    _("мир").translate({ord(u'м'):ord(u'и'), ord(u'и'):'я', ord(u'р'):None})
+    _("МиР").upper()
+    _("мир").zfill(10)
+    _("123").zfill(10)
+
+
+# verify bstr.translate in bytes mode
+def test_strings_bstr_translate_bytemode():
+    bs = b('мир')
+    b_ = xbytes('мир')
+
+    def _(*argv):
+        rb  = bs.translate(*argv)
+        rok = b_.translate(*argv)
+        assert rb == rok
+
+    _(None)
+    _(None, b'')
+    _(None, b'\xd1')
+    _(None, b'\x80\xd1')
+
+    t = bytearray(range(0x100))
+    t[0x80] = 0x81
+    t[0xbc] = 0xbd
+    t = bytes(t)
+    _(t)
+    _(t, b'')
+    _(None, b'\xd1')
+    _(None, b'\x80\xd1')
+
+
+# verify bstr/ustr maketrans
+def test_strings_maketrans():
+    def _(argv, ok):
+        rok = xcall(unicode, 'maketrans', *argv)
+        # py2 unicode does not have maketrans
+        if six.PY2 and isinstance(rok, NotImplementedError):
+            rok = ok
+        assert rok == ok
+
+        rb  = xcall(bstr,    'maketrans', *argv)
+        ru  = xcall(ustr,    'maketrans', *argv)
+
+        argv_b = deepReplaceStr(argv, b)
+        argv_u = deepReplaceStr(argv, u)
+        rb_b = xcall(bstr, 'maketrans', *argv_b)
+        rb_u = xcall(bstr, 'maketrans', *argv_u)
+        ru_b = xcall(ustr, 'maketrans', *argv_b)
+        ru_u = xcall(ustr, 'maketrans', *argv_u)
+
+        assert rok == rb
+        assert rok == ru
+        assert rok == rb_b
+        assert rok == rb_u
+        assert rok == ru_b
+        assert rok == ru_u
+
+    _( ({100:'ы', 200:'я'},)        , {100:u'ы',        200:u'я'} )
+    _( ({'α':'ы', 'β':'я'},)        , {ord(u'α'):u'ы',  ord(u'β'):u'я'} )
+    _( ('αβ', 'ыя')                 , {ord(u'α'):ord(u'ы'),  ord(u'β'):ord(u'я')} )
+    _( ('αβ', 'ыя', 'πρ')           , {ord(u'α'):ord(u'ы'),  ord(u'β'):ord(u'я'),
+                                       ord(u'π'):None,       ord(u'ρ'):None} )
+
 # verify behaviour of bstr|ustr subclasses.
 @mark.parametrize('tx', (unicode, bstr, ustr))
 def test_strings_subclasses(tx):
@@ -1398,6 +1825,18 @@ def tbu(typ):
        return ustr
    raise AssertionError("invalid type %r" % typ)

+# xcall returns result of the call to `obj.meth(*argv, **kw)`.
+# exceptions are also converted to plain returns.
+def xcall(obj, meth, *argv, **kw):
+    if not hasattr(obj, meth):
+        return NotImplementedError(meth)
+    meth = getattr(obj, meth)
+    try:
+        return meth(*argv, **kw)
+    except Exception as e:
+        #traceback.print_exc()
+        return e
+
 # isascii returns whether bytes/unicode x consists of only ASCII characters.
 def isascii(x):
    if isinstance(x, unicode):