Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cython
Commits
c6fe84dc
Commit
c6fe84dc
authored
Feb 20, 2013
by
scoder
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #186 from intellimath/master
Add unicode slicing support
parents
de667b62
7224f33a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
246 additions
and
24 deletions
+246
-24
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+50
-15
Cython/Utility/StringTools.c
Cython/Utility/StringTools.c
+44
-0
tests/run/bytesmethods.pyx
tests/run/bytesmethods.pyx
+6
-6
tests/run/slice_charptr.pyx
tests/run/slice_charptr.pyx
+3
-3
tests/run/unicode_slicing.pyx
tests/run/unicode_slicing.pyx
+143
-0
No files found.
Cython/Compiler/ExprNodes.py
View file @
c6fe84dc
...
@@ -3470,6 +3470,8 @@ class SliceIndexNode(ExprNode):
...
@@ -3470,6 +3470,8 @@ class SliceIndexNode(ExprNode):
if
base_type
.
is_builtin_type
:
if
base_type
.
is_builtin_type
:
# slicing builtin types returns something of the same type
# slicing builtin types returns something of the same type
self
.
type
=
base_type
self
.
type
=
base_type
self
.
base
=
self
.
base
.
as_none_safe_node
(
"'NoneType' object is not subscriptable"
)
c_int
=
PyrexTypes
.
c_py_ssize_t_type
c_int
=
PyrexTypes
.
c_py_ssize_t_type
if
self
.
start
:
if
self
.
start
:
self
.
start
=
self
.
start
.
coerce_to
(
c_int
,
env
)
self
.
start
=
self
.
start
.
coerce_to
(
c_int
,
env
)
...
@@ -3486,6 +3488,11 @@ class SliceIndexNode(ExprNode):
...
@@ -3486,6 +3488,11 @@ class SliceIndexNode(ExprNode):
error
(
self
.
pos
,
error
(
self
.
pos
,
"Slicing is not currently supported for '%s'."
%
self
.
type
)
"Slicing is not currently supported for '%s'."
%
self
.
type
)
return
return
base_result
=
self
.
base
.
result
()
result
=
self
.
result
()
start_code
=
self
.
start_code
()
stop_code
=
self
.
stop_code
()
if
self
.
base
.
type
.
is_string
:
if
self
.
base
.
type
.
is_string
:
base_result
=
self
.
base
.
result
()
base_result
=
self
.
base
.
result
()
if
self
.
base
.
type
!=
PyrexTypes
.
c_char_ptr_type
:
if
self
.
base
.
type
!=
PyrexTypes
.
c_char_ptr_type
:
...
@@ -3493,27 +3500,37 @@ class SliceIndexNode(ExprNode):
...
@@ -3493,27 +3500,37 @@ class SliceIndexNode(ExprNode):
if
self
.
stop
is
None
:
if
self
.
stop
is
None
:
code
.
putln
(
code
.
putln
(
"%s = PyBytes_FromString(%s + %s); %s"
%
(
"%s = PyBytes_FromString(%s + %s); %s"
%
(
self
.
result
()
,
result
,
base_result
,
base_result
,
s
elf
.
start_code
()
,
s
tart_code
,
code
.
error_goto_if_null
(
self
.
result
()
,
self
.
pos
)))
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
else
:
else
:
code
.
putln
(
code
.
putln
(
"%s = PyBytes_FromStringAndSize(%s + %s, %s - %s); %s"
%
(
"%s = PyBytes_FromStringAndSize(%s + %s, %s - %s); %s"
%
(
self
.
result
(),
self
.
result
(),
base_result
,
base_result
,
self
.
start_code
(),
start_code
,
self
.
stop_code
(),
stop_code
,
self
.
start_code
(),
start_code
,
code
.
error_goto_if_null
(
self
.
result
(),
self
.
pos
)))
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
elif
self
.
base
.
type
is
unicode_type
:
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyUnicode_Substring"
,
"StringTools.c"
))
code
.
putln
(
"%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s"
%
(
result
,
base_result
,
start_code
,
stop_code
,
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
else
:
else
:
code
.
putln
(
code
.
putln
(
"%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s"
%
(
"%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s"
%
(
self
.
result
()
,
result
,
self
.
base
.
py_result
(),
self
.
base
.
py_result
(),
s
elf
.
start_code
()
,
s
tart_code
,
s
elf
.
stop_code
()
,
s
top_code
,
code
.
error_goto_if_null
(
self
.
result
()
,
self
.
pos
)))
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
code
.
put_gotref
(
self
.
py_result
())
def
generate_assignment_code
(
self
,
rhs
,
code
):
def
generate_assignment_code
(
self
,
rhs
,
code
):
...
@@ -4958,10 +4975,8 @@ class AttributeNode(ExprNode):
...
@@ -4958,10 +4975,8 @@ class AttributeNode(ExprNode):
def
generate_result_code
(
self
,
code
):
def
generate_result_code
(
self
,
code
):
if
self
.
is_py_attr
:
if
self
.
is_py_attr
:
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyObjectGetAttrStr"
,
"ObjectHandling.c"
))
code
.
putln
(
code
.
putln
(
'%s =
__Pyx_PyObject_GetAttrS
tr(%s, %s); %s'
%
(
'%s =
PyObject_GetAt
tr(%s, %s); %s'
%
(
self
.
result
(),
self
.
result
(),
self
.
obj
.
py_result
(),
self
.
obj
.
py_result
(),
code
.
intern_identifier
(
self
.
attribute
),
code
.
intern_identifier
(
self
.
attribute
),
...
@@ -10229,13 +10244,33 @@ class DocstringRefNode(ExprNode):
...
@@ -10229,13 +10244,33 @@ class DocstringRefNode(ExprNode):
code
.
put_gotref
(
self
.
result
())
code
.
put_gotref
(
self
.
result
())
#------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------
#
#
# Runtime support code
# Runtime support code
#
#
#------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------
get_name_interned_utility_code
=
UtilityCode
.
load
(
"GetGlobalName"
,
"ObjectHandling.c"
)
get_name_interned_utility_code
=
UtilityCode
(
proto
=
"""
static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
"""
,
impl
=
"""
static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
PyObject *result;
result = PyObject_GetAttr(dict, name);
if (!result) {
if (dict != %(BUILTINS)s) {
PyErr_Clear();
result = PyObject_GetAttr(%(BUILTINS)s, name);
}
if (!result) {
PyErr_SetObject(PyExc_NameError, name);
}
}
return result;
}
"""
%
{
'BUILTINS'
:
Naming
.
builtins_cname
})
#------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------
...
...
Cython/Utility/StringTools.c
View file @
c6fe84dc
...
@@ -374,3 +374,47 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
...
@@ -374,3 +374,47 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
return
PyUnicode_Decode
(
cstring
,
length
,
encoding
,
errors
);
return
PyUnicode_Decode
(
cstring
,
length
,
encoding
,
errors
);
}
}
}
}
/////////////// PyUnicode_Substring.proto ///////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_PyUnicode_Substring
(
PyObject
*
text
,
Py_ssize_t
start
,
Py_ssize_t
stop
);
/////////////// PyUnicode_Substring ///////////////
#if CYTHON_PEP393_ENABLED
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start)
#else
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start)
#endif
static
CYTHON_INLINE
PyObject
*
__Pyx_PyUnicode_Substring
(
PyObject
*
text
,
Py_ssize_t
start
,
Py_ssize_t
stop
)
{
Py_ssize_t
length
;
#if CYTHON_PEP393_ENABLED
if
(
PyUnicode_READY
(
self
)
==
-
1
)
return
NULL
;
#endif
#if CYTHON_PEP393_ENABLED
length
=
PyUnicode_GET_LENGTH
(
text
);
#else
length
=
PyUnicode_GET_SIZE
(
text
);
#endif
if
(
start
<
0
)
{
start
+=
length
;
if
(
start
<
0
)
start
=
0
;
}
if
(
stop
<
0
)
stop
+=
length
;
if
(
stop
>
length
)
stop
=
length
;
length
=
stop
-
start
;
if
(
length
<=
0
)
return
PyUnicode_FromUnicode
(
NULL
,
0
);
return
(
PyObject
*
)
__Pyx_PyUnicode_SUBSTRING
(
text
,
start
,
stop
);
}
\ No newline at end of file
tests/run/bytesmethods.pyx
View file @
c6fe84dc
...
@@ -133,13 +133,13 @@ def bytes_decode(bytes s, start=None, stop=None):
...
@@ -133,13 +133,13 @@ def bytes_decode(bytes s, start=None, stop=None):
AttributeError: 'NoneType' object has no attribute 'decode'
AttributeError: 'NoneType' object has no attribute 'decode'
>>> print(bytes_decode(None, 1))
>>> print(bytes_decode(None, 1))
Traceback (most recent call last):
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode'
TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode(None, None, 1))
>>> print(bytes_decode(None, None, 1))
Traceback (most recent call last):
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode'
TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode(None, 0, 1))
>>> print(bytes_decode(None, 0, 1))
Traceback (most recent call last):
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode'
TypeError: 'NoneType' object is not subscriptable
"""
"""
if
start
is
None
:
if
start
is
None
:
if
stop
is
None
:
if
stop
is
None
:
...
@@ -173,13 +173,13 @@ def bytes_decode_unbound_method(bytes s, start=None, stop=None):
...
@@ -173,13 +173,13 @@ def bytes_decode_unbound_method(bytes s, start=None, stop=None):
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
>>> print(bytes_decode_unbound_method(None, 1))
>>> print(bytes_decode_unbound_method(None, 1))
Traceback (most recent call last):
Traceback (most recent call last):
TypeError:
descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
TypeError:
'NoneType' object is not subscriptable
>>> print(bytes_decode_unbound_method(None, None, 1))
>>> print(bytes_decode_unbound_method(None, None, 1))
Traceback (most recent call last):
Traceback (most recent call last):
TypeError:
descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
TypeError:
'NoneType' object is not subscriptable
>>> print(bytes_decode_unbound_method(None, 0, 1))
>>> print(bytes_decode_unbound_method(None, 0, 1))
Traceback (most recent call last):
Traceback (most recent call last):
TypeError:
descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
TypeError:
'NoneType' object is not subscriptable
"""
"""
if
start
is
None
:
if
start
is
None
:
if
stop
is
None
:
if
stop
is
None
:
...
...
tests/run/slice_charptr.pyx
View file @
c6fe84dc
__doc__
=
u"""
__doc__
=
u"""
>>> do_slice(b'abcdef', 2, 3)
>>> do_slice(b'abcdef', 2, 3)
(b'c', b'cdef', b'ab', b'abcdef')
(b'c', b'cdef', b'ab', b'abcdef'
, b'cdef', b'ab', b'abcdef'
)
>>> do_slice(b'abcdef', 0, 5)
>>> do_slice(b'abcdef', 0, 5)
(b'abcde', b'abcdef', b'', b'abcdef')
(b'abcde', b'abcdef', b'', b'abcdef'
, b'abcdef', b'', b'abcdef'
)
"""
"""
import
sys
import
sys
...
@@ -12,5 +12,5 @@ if sys.version_info[0] < 3:
...
@@ -12,5 +12,5 @@ if sys.version_info[0] < 3:
def
do_slice
(
s
,
int
i
,
int
j
):
def
do_slice
(
s
,
int
i
,
int
j
):
cdef
char
*
ss
=
s
cdef
char
*
ss
=
s
return
ss
[
i
:
j
],
ss
[
i
:],
ss
[:
i
],
ss
[:]
return
ss
[
i
:
j
],
ss
[
i
:],
ss
[:
i
],
ss
[:]
,
ss
[
i
:
None
],
ss
[
None
:
i
],
ss
[
None
:
None
]
tests/run/unicode_slicing.pyx
0 → 100644
View file @
c6fe84dc
# coding: utf-8
__doc__
=
u"""
>>> do_slice1(u'abcdef', 2, 3)
c
>>> do_slice2(u'abcdef', 2, 3)
cdef
>>> do_slice3(u'abcdef', 2, 3)
ab
>>> do_slice4(u'abcdef', 2, 3)
abcdef
>>> do_slice5(u'abcdef', 2, 3)
cdef
>>> do_slice6(u'abcdef', 2, 3)
ab
>>> do_slice7(u'abcdef', 2, 3)
abcdef
>>> do_slice1(u'abcdef', 2, 10)
cdef
>>> do_slice2(u'abcdef', 2, 10)
cdef
>>> do_slice3(u'abcdef', 2, 10)
ab
>>> do_slice4(u'abcdef', 2, 10)
abcdef
>>> do_slice1(u'abcdef', 0, 5)
abcde
>>> do_slice2(u'abcdef', 0, 5)
abcdef
>>> do_slice3(u'abcdef', 0, 5)
<BLANKLINE>
>>> do_slice4(u'abcdef', 0, 5)
abcdef
>>> do_slice5(u'abcdef', 0, 5)
abcdef
>>> do_slice6(u'abcdef', 0, 5)
<BLANKLINE>
>>> do_slice7(u'abcdef', 0, 5)
abcdef
>>> do_slice1(u'abcdef', -6, -1)
abcde
>>> do_slice2(u'abcdef', -6, -1)
abcdef
>>> do_slice3(u'abcdef', -6, -1)
<BLANKLINE>
>>> do_slice4(u'abcdef', -6, -1)
abcdef
>>> do_slice5(u'abcdef', -6, -1)
abcdef
>>> do_slice6(u'abcdef', -6, -1)
<BLANKLINE>
>>> do_slice7(u'abcdef', -6, -1)
abcdef
>>> do_slice1(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdД
>>> do_slice2(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdДeЕfФ
>>> do_slice3(u'aАbБcСdДeЕfФ', 2, 8)
aА
>>> do_slice4(u'aАbБcСdДeЕfФ', 2, 8)
aАbБcСdДeЕfФ
>>> do_slice5(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdДeЕfФ
>>> do_slice6(u'aАbБcСdДeЕfФ', 2, 8)
aА
>>> do_slice7(u'aАbБcСdДeЕfФ', 2, 8)
aАbБcСdДeЕfФ
>>> do_slice1(u'АБСДЕФ', 2, 4)
СД
>>> do_slice2(u'АБСДЕФ', 2, 4)
СДЕФ
>>> do_slice3(u'АБСДЕФ', 2, 4)
АБ
>>> do_slice4(u'АБСДЕФ', 2, 4)
АБСДЕФ
>>> do_slice5(u'АБСДЕФ', 2, 4)
СДЕФ
>>> do_slice6(u'АБСДЕФ', 2, 4)
АБ
>>> do_slice7(u'АБСДЕФ', 2, 4)
АБСДЕФ
>>> do_slice1(u'АБСДЕФ', -4, -2)
СД
>>> do_slice2(u'АБСДЕФ', -4, -2)
СДЕФ
>>> do_slice3(u'АБСДЕФ', -4, -2)
АБ
>>> do_slice4(u'АБСДЕФ', -4, -2)
АБСДЕФ
>>> do_slice5(u'АБСДЕФ', -4, -2)
СДЕФ
>>> do_slice6(u'АБСДЕФ', -4, -2)
АБ
>>> do_slice7(u'АБСДЕФ', -4, -2)
АБСДЕФ
>>> do_slice1(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice2(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice3(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice4(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice5(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice6(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice7(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
import
sys
if
sys
.
version_info
[
0
]
>=
3
:
__doc__
=
__doc__
.
replace
(
u"(u'"
,
u"('"
).
replace
(
u" u'"
,
u" '"
)
def
do_slice1
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[
i
:
j
])
def
do_slice2
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[
i
:])
def
do_slice3
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[:
i
])
def
do_slice4
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[:])
def
do_slice5
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[
i
:
None
])
def
do_slice6
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[
None
:
i
])
def
do_slice7
(
unicode
s
,
int
i
,
int
j
):
print
(
s
[
None
:
None
])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment