Commit 376e8e77 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge branch 'unicode'

This commit set adds the unicode type, as well as the ability to
parse unicode literals such as u"\u0180".

There's still a lot more that needs to be added; for instance,
we don't currently support printing unicode values to stdout.
parents 077fffc6 bd80565f
......@@ -292,8 +292,8 @@ STDLIB_RELEASE_OBJS := stdlib.release.bc.o
ASM_SRCS := $(wildcard src/runtime/*.S)
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c socketmodule.c $(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c exceptions.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c $(EXTRA_STDPYTHON_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c exceptions.c unicodeobject.c unicodectype.c bytearrayobject.c bytes_methods.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c formatter_unicode.c $(EXTRA_STDPYTHON_SRCS)
FROM_CPYTHON_SRCS := $(addprefix from_cpython/Modules/,$(STDMODULE_SRCS)) $(addprefix from_cpython/Objects/,$(STDOBJECT_SRCS)) $(addprefix from_cpython/Python/,$(STDPYTHON_SRCS))
# The stdlib objects have slightly longer dependency chains,
......
......@@ -18,10 +18,10 @@ add_custom_target(copy_stdlib ALL DEPENDS ${STDLIB_TARGETS})
file(GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c socketmodule.c)
# compile specified files in from_cpython/Objects
file(GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c exceptions.c)
file(GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c exceptions.c unicodeobject.c unicodectype.c bytearrayobject.c bytes_methods.c)
# compile specified files in from_cpython/Python
file(GLOB_RECURSE STDPYTHON_SRCS Python getargs.c pyctype.c formatter_string.c pystrtod.c dtoa.c)
file(GLOB_RECURSE STDPYTHON_SRCS Python getargs.c pyctype.c formatter_string.c pystrtod.c dtoa.c formatter_unicode.c)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-field-initializers -Wno-tautological-compare -Wno-type-limits")
add_library(FROM_CPYTHON OBJECT ${STDMODULE_SRCS} ${STDOBJECT_SRCS} ${STDPYTHON_SRCS})
......@@ -58,7 +58,9 @@
#include "complexobject.h"
#endif
#include "stringobject.h"
#include "bufferobject.h"
#include "bytesobject.h"
#include "bytearrayobject.h"
#include "listobject.h"
#include "dictobject.h"
#include "tupleobject.h"
......
......@@ -1184,8 +1184,8 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
*/
#define PySequence_Fast_ITEMS(sf) \
(PyList_Check(sf) ? ((PyListObject *)(sf))->ob_item \
: ((PyTupleObject *)(sf))->ob_item)
(PyList_Check(sf) ? (PyList_Items(sf)) \
: (PyTuple_Items(sf)))
/* Return a pointer to the underlying item array for
an object retured by PySequence_Fast */
......
// This file is originally from CPython 2.7, with modifications for Pyston
/* Buffer object interface */
/* Note: the object's structure is private */
#ifndef Py_BUFFEROBJECT_H
#define Py_BUFFEROBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
PyAPI_DATA(PyTypeObject) PyBuffer_Type;
#define PyBuffer_Check(op) (Py_TYPE(op) == &PyBuffer_Type)
#define Py_END_OF_BUFFER (-1)
PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base,
Py_ssize_t offset, Py_ssize_t size) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base,
Py_ssize_t offset,
Py_ssize_t size) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size) PYSTON_NOEXCEPT;
#ifdef __cplusplus
}
#endif
#endif /* !Py_BUFFEROBJECT_H */
// This file is originally from CPython 2.7, with modifications for Pyston
/* ByteArray object interface */
#ifndef Py_BYTEARRAYOBJECT_H
#define Py_BYTEARRAYOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
/* Type PyByteArrayObject represents a mutable array of bytes.
* The Python API is that of a sequence;
* the bytes are mapped to ints in [0, 256).
* Bytes are not characters; they may be used to encode characters.
* The only way to go between bytes and str/unicode is via encoding
* and decoding.
* For the convenience of C programmers, the bytes type is considered
* to contain a char pointer, not an unsigned char pointer.
*/
/* Object layout */
typedef struct {
PyObject_VAR_HEAD
/* XXX(nnorwitz): should ob_exports be Py_ssize_t? */
int ob_exports; /* how many buffer exports */
Py_ssize_t ob_alloc; /* How many bytes allocated */
char *ob_bytes;
} PyByteArrayObject;
/* Type object */
PyAPI_DATA(PyTypeObject) PyByteArray_Type;
PyAPI_DATA(PyTypeObject) PyByteArrayIter_Type;
/* Type check macros */
#define PyByteArray_Check(self) PyObject_TypeCheck(self, &PyByteArray_Type)
#define PyByteArray_CheckExact(self) (Py_TYPE(self) == &PyByteArray_Type)
/* Direct API functions */
PyAPI_FUNC(PyObject *) PyByteArray_FromObject(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyByteArray_Concat(PyObject *, PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyByteArray_FromStringAndSize(const char *, Py_ssize_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(Py_ssize_t) PyByteArray_Size(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(char *) PyByteArray_AsString(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(int) PyByteArray_Resize(PyObject *, Py_ssize_t) PYSTON_NOEXCEPT;
/* Macros, trading safety for speed */
#define PyByteArray_AS_STRING(self) \
(assert(PyByteArray_Check(self)), \
Py_SIZE(self) ? ((PyByteArrayObject *)(self))->ob_bytes : _PyByteArray_empty_string)
#define PyByteArray_GET_SIZE(self) (assert(PyByteArray_Check(self)),Py_SIZE(self))
PyAPI_DATA(char) _PyByteArray_empty_string[];
#ifdef __cplusplus
}
#endif
#endif /* !Py_BYTEARRAYOBJECT_H */
// This file is originally from CPython 2.7, with modifications for Pyston
#ifndef Py_BYTES_CTYPE_H
#define Py_BYTES_CTYPE_H
/*
* The internal implementation behind PyString (bytes) and PyBytes (buffer)
* methods of the given names, they operate on ASCII byte strings.
*/
extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
/* These store their len sized answer in the given preallocated *result arg. */
extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
/* Shared __doc__ strings. */
extern const char _Py_isspace__doc__[];
extern const char _Py_isalpha__doc__[];
extern const char _Py_isalnum__doc__[];
extern const char _Py_isdigit__doc__[];
extern const char _Py_islower__doc__[];
extern const char _Py_isupper__doc__[];
extern const char _Py_istitle__doc__[];
extern const char _Py_lower__doc__[];
extern const char _Py_upper__doc__[];
extern const char _Py_title__doc__[];
extern const char _Py_capitalize__doc__[];
extern const char _Py_swapcase__doc__[];
/* These are left in for backward compatibility and will be removed
in 2.8/3.2 */
#define ISLOWER(c) Py_ISLOWER(c)
#define ISUPPER(c) Py_ISUPPER(c)
#define ISALPHA(c) Py_ISALPHA(c)
#define ISDIGIT(c) Py_ISDIGIT(c)
#define ISXDIGIT(c) Py_ISXDIGIT(c)
#define ISALNUM(c) Py_ISALNUM(c)
#define ISSPACE(c) Py_ISSPACE(c)
#undef islower
#define islower(c) undefined_islower(c)
#undef isupper
#define isupper(c) undefined_isupper(c)
#undef isalpha
#define isalpha(c) undefined_isalpha(c)
#undef isdigit
#define isdigit(c) undefined_isdigit(c)
#undef isxdigit
#define isxdigit(c) undefined_isxdigit(c)
#undef isalnum
#define isalnum(c) undefined_isalnum(c)
#undef isspace
#define isspace(c) undefined_isspace(c)
/* These are left in for backward compatibility and will be removed
in 2.8/3.2 */
#define TOLOWER(c) Py_TOLOWER(c)
#define TOUPPER(c) Py_TOUPPER(c)
#undef tolower
#define tolower(c) undefined_tolower(c)
#undef toupper
#define toupper(c) undefined_toupper(c)
/* this is needed because some docs are shared from the .o, not static */
#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str)
#endif /* !Py_BYTES_CTYPE_H */
......@@ -70,6 +70,9 @@ PyAPI_FUNC(int) PyList_Reverse(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(PyObject **) PyList_Items(PyObject *) PYSTON_NOEXCEPT;
/* Macro, trading safety for speed */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#define PyList_GET_ITEM(op, i) PyList_GetItem((PyObject*)(op), (i))
......
......@@ -99,6 +99,9 @@ PyAPI_FUNC(void) PyString_InternImmortal(PyObject **) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(char) PyString_GetItem(PyObject *, Py_ssize_t) PYSTON_NOEXCEPT;
/* Use only if you know it's a string */
#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
......@@ -193,7 +196,7 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
register Py_ssize_t *len /* pointer to length variable or NULL
(only possible for 0-terminated
strings) */
);
) PYSTON_NOEXCEPT;
/* Using the current locale, insert the thousands grouping
......
......@@ -59,6 +59,9 @@ PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(PyObject **) PyTuple_Items(PyObject *) PYSTON_NOEXCEPT;
/* Macro, trading safety for speed */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#define PyTuple_GET_ITEM(op, i) PyTuple_GetItem(op, i)
......
// This file is originally from CPython 2.7, with modifications for Pyston
/* Unicode name database interface */
#ifndef Py_UCNHASH_H
#define Py_UCNHASH_H
#ifdef __cplusplus
extern "C" {
#endif
/* revised ucnhash CAPI interface (exported through a "wrapper") */
#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
typedef struct {
/* Size of this struct */
int size;
/* Get name for a given character code. Returns non-zero if
success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen);
/* Get character code for a given name. Same error handling
as for getname. */
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code);
} _PyUnicode_Name_CAPI;
#ifdef __cplusplus
}
#endif
#endif /* !Py_UCNHASH_H */
......@@ -414,8 +414,6 @@ extern "C" {
/* --- Unicode Type ------------------------------------------------------- */
// Pyston change: this is not our object format
#if 0
typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Length of raw Unicode data in buffer */
......@@ -425,14 +423,8 @@ typedef struct {
string, or NULL; this is used for
implementing the buffer protocol */
} PyUnicodeObject;
#endif
struct _PyUnicodeObject;
typedef struct _PyUnicodeObject PyUnicodeObject;
// Pyston change: this is no longer a static object
PyAPI_DATA(PyTypeObject*) unicode_cls;
#define PyUnicode_Type (*unicode_cls)
//PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
// Pyston changes: these aren't direct macros any more [they potentially could be though]
PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
......@@ -443,8 +435,6 @@ PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
#endif
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#if 0
/* Fast access macros */
#define PyUnicode_GET_SIZE(op) \
(((PyUnicodeObject *)(op))->length)
......@@ -454,11 +444,6 @@ PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
(((PyUnicodeObject *)(op))->str)
#define PyUnicode_AS_DATA(op) \
((const char *)((PyUnicodeObject *)(op))->str)
#endif
PyAPI_FUNC(Py_ssize_t) PyUnicode_GET_SIZE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(Py_ssize_t) PyUnicode_GET_DATA_SIZE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AS_UNICODE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(const char *) PyUnicode_AS_DATA(PyObject*) PYSTON_NOEXCEPT;
/* --- Constants ---------------------------------------------------------- */
......
This diff is collapsed.
This diff is collapsed.
......@@ -1025,8 +1025,7 @@ done:
/************************************************************************/
/* this is the main entry point */
// Pyston change: changed to non-static
/* static */ PyObject *
static PyObject *
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
{
SubString input;
......
......@@ -7,3 +7,12 @@
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h"
// do_string_format needs to be declared as a static function, since it's used by both stringobject.c
// and unicodeobject.c. We want to access it from str.cpp, though, so just use this little forwarding
// function.
// We could also potentially have tried to modifie string_format.h to choose whether to mark the function
// as static or not.
PyObject * _do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) {
return do_string_format(self, args, kwargs);
}
// This file is originally from CPython 2.7, with modifications for Pyston
/*
Unicode character type helpers.
Written by Marc-Andre Lemburg (mal@lemburg.com).
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Copyright (c) Corporation for National Research Initiatives.
*/
#include "Python.h"
#include "unicodeobject.h"
#define ALPHA_MASK 0x01
#define DECIMAL_MASK 0x02
#define DIGIT_MASK 0x04
#define LOWER_MASK 0x08
#define LINEBREAK_MASK 0x10
#define SPACE_MASK 0x20
#define TITLE_MASK 0x40
#define UPPER_MASK 0x80
#define NODELTA_MASK 0x100
#define NUMERIC_MASK 0x200
typedef struct {
const Py_UNICODE upper;
const Py_UNICODE lower;
const Py_UNICODE title;
const unsigned char decimal;
const unsigned char digit;
const unsigned short flags;
} _PyUnicode_TypeRecord;
#include "unicodetype_db.h"
static const _PyUnicode_TypeRecord *
gettyperecord(Py_UNICODE code)
{
int index;
#ifdef Py_UNICODE_WIDE
if (code >= 0x110000)
index = 0;
else
#endif
{
index = index1[(code>>SHIFT)];
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
}
return &_PyUnicode_TypeRecords[index];
}
/* Returns the titlecase Unicode characters corresponding to ch or just
ch if no titlecase mapping is known. */
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->title;
if (ctype->flags & NODELTA_MASK)
return delta;
if (delta >= 32768)
delta -= 65536;
return ch + delta;
}
/* Returns 1 for Unicode characters having the category 'Lt', 0
otherwise. */
int _PyUnicode_IsTitlecase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & TITLE_MASK) != 0;
}
/* Returns the integer decimal (0-9) for Unicode characters having
this property, -1 otherwise. */
int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
}
int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
{
if (_PyUnicode_ToDecimalDigit(ch) < 0)
return 0;
return 1;
}
/* Returns the integer digit (0-9) for Unicode characters having
this property, -1 otherwise. */
int _PyUnicode_ToDigit(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
}
int _PyUnicode_IsDigit(Py_UNICODE ch)
{
if (_PyUnicode_ToDigit(ch) < 0)
return 0;
return 1;
}
/* Returns the numeric value as double for Unicode characters having
this property, -1.0 otherwise. */
int _PyUnicode_IsNumeric(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & NUMERIC_MASK) != 0;
}
#ifndef WANT_WCTYPE_FUNCTIONS
/* Returns 1 for Unicode characters having the category 'Ll', 0
otherwise. */
int _PyUnicode_IsLowercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & LOWER_MASK) != 0;
}
/* Returns 1 for Unicode characters having the category 'Lu', 0
otherwise. */
int _PyUnicode_IsUppercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & UPPER_MASK) != 0;
}
/* Returns the uppercase Unicode characters corresponding to ch or just
ch if no uppercase mapping is known. */
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->upper;
if (ctype->flags & NODELTA_MASK)
return delta;
if (delta >= 32768)
delta -= 65536;
return ch + delta;
}
/* Returns the lowercase Unicode characters corresponding to ch or just
ch if no lowercase mapping is known. */
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->lower;
if (ctype->flags & NODELTA_MASK)
return delta;
if (delta >= 32768)
delta -= 65536;
return ch + delta;
}
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
'Lo' or 'Lm', 0 otherwise. */
int _PyUnicode_IsAlpha(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & ALPHA_MASK) != 0;
}
#else
/* Export the interfaces using the wchar_t type for portability
reasons: */
int _PyUnicode_IsLowercase(Py_UNICODE ch)
{
return iswlower(ch);
}
int _PyUnicode_IsUppercase(Py_UNICODE ch)
{
return iswupper(ch);
}
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
{
return towlower(ch);
}
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
{
return towupper(ch);
}
int _PyUnicode_IsAlpha(Py_UNICODE ch)
{
return iswalpha(ch);
}
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -27,6 +27,14 @@
namespace pyston {
extern "C" Py_ssize_t _PyObject_LengthHint(PyObject* o, Py_ssize_t defaultvalue) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyBuffer_ToContiguous(void* buf, Py_buffer* view, Py_ssize_t len, char fort) noexcept {
Py_FatalError("unimplemented");
}
static PyObject* type_error(const char* msg, PyObject* obj) noexcept {
PyErr_Format(PyExc_TypeError, msg, Py_TYPE(obj)->tp_name);
return NULL;
......@@ -469,7 +477,20 @@ extern "C" int PyObject_IsSubclass(PyObject* derived, PyObject* cls) noexcept {
}
extern "C" PyObject* _PyObject_CallFunction_SizeT(PyObject* callable, const char* format, ...) noexcept {
Py_FatalError("unimplemented");
va_list va;
PyObject* args;
if (callable == NULL)
return null_error();
if (format && *format) {
va_start(va, format);
args = _Py_VaBuildValue_SizeT(format, va);
va_end(va);
} else
args = PyTuple_New(0);
return call_function_tail(callable, args);
}
#define NEW_STYLE_NUMBER(o) PyType_HasFeature((o)->cls, Py_TPFLAGS_CHECKTYPES)
......
......@@ -69,6 +69,18 @@ static int countformat(const char* format, int endchar) noexcept {
return count;
}
#ifdef Py_USING_UNICODE
static int _ustrlen(Py_UNICODE* u) {
int i = 0;
Py_UNICODE* v = u;
while (*v != 0) {
i++;
v++;
}
return i;
}
#endif
static PyObject* do_mktuple(const char**, va_list*, int, int, int) noexcept;
// static PyObject *do_mklist(const char**, va_list *, int, int, int) noexcept;
// static PyObject *do_mkdict(const char**, va_list *, int, int, int) noexcept;
......@@ -162,7 +174,30 @@ static PyObject* do_mkvalue(const char** p_format, va_list* p_va, int flags) noe
}
return v;
}
#ifdef Py_USING_UNICODE
case 'u': {
PyObject* v;
Py_UNICODE* u = va_arg(*p_va, Py_UNICODE*);
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
n = va_arg(*p_va, Py_ssize_t);
else
n = va_arg(*p_va, int);
} else
n = -1;
if (u == NULL) {
v = Py_None;
Py_INCREF(v);
} else {
if (n < 0)
n = _ustrlen(u);
v = PyUnicode_FromUnicode(u, n);
}
return v;
}
#endif
default:
RELEASE_ASSERT(0, "%c", *((*p_format) - 1));
}
......
......@@ -26,8 +26,83 @@
namespace pyston {
extern "C" {
_Py_HashSecret_t _Py_HashSecret;
}
extern "C" PyObject* PyObject_Unicode(PyObject* v) noexcept {
Py_FatalError("unimplemented");
PyObject* res;
PyObject* func;
PyObject* str;
int unicode_method_found = 0;
static PyObject* unicodestr = NULL;
if (v == NULL) {
res = PyString_FromString("<NULL>");
if (res == NULL)
return NULL;
str = PyUnicode_FromEncodedObject(res, NULL, "strict");
Py_DECREF(res);
return str;
} else if (PyUnicode_CheckExact(v)) {
Py_INCREF(v);
return v;
}
if (PyInstance_Check(v)) {
/* We're an instance of a classic class */
/* Try __unicode__ from the instance -- alas we have no type */
if (!unicodestr) {
unicodestr = boxStrConstant("__unicode__");
gc::registerPermanentRoot(unicodestr);
if (!unicodestr)
return NULL;
}
func = PyObject_GetAttr(v, unicodestr);
if (func != NULL) {
unicode_method_found = 1;
res = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
} else {
PyErr_Clear();
}
} else {
/* Not a classic class instance, try __unicode__. */
func = _PyObject_LookupSpecial(v, "__unicode__", &unicodestr);
if (func != NULL) {
unicode_method_found = 1;
res = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
} else if (PyErr_Occurred())
return NULL;
}
/* Didn't find __unicode__ */
if (!unicode_method_found) {
if (PyUnicode_Check(v)) {
/* For a Unicode subtype that's didn't overwrite __unicode__,
return a true Unicode object with the same data. */
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), PyUnicode_GET_SIZE(v));
}
if (PyString_CheckExact(v)) {
Py_INCREF(v);
res = v;
} else {
if (Py_TYPE(v)->tp_str != NULL)
res = (*Py_TYPE(v)->tp_str)(v);
else
res = PyObject_Repr(v);
}
}
if (res == NULL)
return NULL;
if (!PyUnicode_Check(res)) {
str = PyUnicode_FromEncodedObject(res, NULL, "strict");
Py_DECREF(res);
res = str;
}
return res;
}
extern "C" PyObject* _PyObject_Str(PyObject* v) noexcept {
......
......@@ -33,6 +33,7 @@
#include "core/stats.h"
#include "core/thread_utils.h"
#include "core/util.h"
#include "runtime/capi.h"
#include "runtime/generator.h"
#include "runtime/import.h"
#include "runtime/inline/boxing.h"
......@@ -497,7 +498,9 @@ Value ASTInterpreter::visit_langPrimitive(AST_LangPrimitive* node) {
assert(node->args[1]->type == AST_TYPE::Str);
Value module = visit_expr(node->args[0]);
const std::string& name = ast_cast<AST_Str>(node->args[1])->s;
auto ast_str = ast_cast<AST_Str>(node->args[1]);
assert(ast_str->str_type == AST_Str::STR);
const std::string& name = ast_str->str_data;
assert(name.size());
v = importFrom(module.o, &name);
} else if (node->opcode == AST_LangPrimitive::IMPORT_NAME) {
......@@ -508,7 +511,9 @@ Value ASTInterpreter::visit_langPrimitive(AST_LangPrimitive* node) {
int level = static_cast<AST_Num*>(node->args[0])->n_int;
Value froms = visit_expr(node->args[1]);
const std::string& module_name = static_cast<AST_Str*>(node->args[2])->s;
auto ast_str = ast_cast<AST_Str>(node->args[2]);
assert(ast_str->str_type == AST_Str::STR);
const std::string& module_name = ast_str->str_data;
v = import(level, froms.o, &module_name);
} else if (node->opcode == AST_LangPrimitive::IMPORT_STAR) {
assert(node->args.size() == 1);
......@@ -996,7 +1001,13 @@ Value ASTInterpreter::visit_set(AST_Set* node) {
}
Value ASTInterpreter::visit_str(AST_Str* node) {
return boxString(node->s);
if (node->str_type == AST_Str::STR) {
return boxString(node->str_data);
} else if (node->str_type == AST_Str::UNICODE) {
return decodeUTF8StringPtr(&node->str_data);
} else {
RELEASE_ASSERT(0, "%d", node->str_type);
}
}
Value ASTInterpreter::visit_name(AST_Name* node) {
......
......@@ -1798,6 +1798,12 @@ CompilerVariable* makeStr(const std::string* s) {
return new ValuedCompilerVariable<const std::string*>(STR_CONSTANT, s, true);
}
CompilerVariable* makeUnicode(IREmitter& emitter, const std::string* s) {
llvm::Value* boxed
= emitter.getBuilder()->CreateCall(g.funcs.decodeUTF8StringPtr, embedConstantPtr(s, g.llvm_str_type_ptr));
return new ConcreteCompilerVariable(typeFromClass(unicode_cls), boxed, true);
}
class VoidType : public ConcreteCompilerType {
public:
llvm::Type* llvmType() override { return g.void_; }
......
......@@ -379,6 +379,7 @@ ConcreteCompilerVariable* makeBool(bool);
ConcreteCompilerVariable* makeLong(IREmitter& emitter, std::string&);
ConcreteCompilerVariable* makePureImaginary(IREmitter& emitter, double imag);
CompilerVariable* makeStr(const std::string*);
CompilerVariable* makeUnicode(IREmitter& emitter, const std::string*);
CompilerVariable* makeFunction(IREmitter& emitter, CLFunction*, CompilerVariable* closure, bool isGenerator,
const std::vector<ConcreteCompilerVariable*>& defaults);
ConcreteCompilerVariable* undefVariable();
......
......@@ -515,7 +515,9 @@ private:
ConcreteCompilerVariable* converted_module = module->makeConverted(emitter, module->getBoxType());
module->decvref(emitter);
const std::string& name = ast_cast<AST_Str>(node->args[1])->s;
auto ast_str = ast_cast<AST_Str>(node->args[1]);
assert(ast_str->str_type == AST_Str::STR);
const std::string& name = ast_str->str_data;
assert(name.size());
llvm::Value* r = emitter.createCall2(unw_info, g.funcs.importFrom, converted_module->getValue(),
......@@ -558,7 +560,9 @@ private:
ConcreteCompilerVariable* converted_froms = froms->makeConverted(emitter, froms->getBoxType());
froms->decvref(emitter);
const std::string& module_name = static_cast<AST_Str*>(node->args[2])->s;
auto ast_str = ast_cast<AST_Str>(node->args[2]);
assert(ast_str->str_type == AST_Str::STR);
const std::string& module_name = ast_str->str_data;
llvm::Value* imported = emitter.createCall3(unw_info, g.funcs.import, getConstantInt(level, g.i32),
converted_froms->getValue(),
......@@ -1008,7 +1012,15 @@ private:
return new ConcreteCompilerVariable(SLICE, rtn, true);
}
CompilerVariable* evalStr(AST_Str* node, UnwindInfo unw_info) { return makeStr(&node->s); }
CompilerVariable* evalStr(AST_Str* node, UnwindInfo unw_info) {
if (node->str_type == AST_Str::STR) {
return makeStr(&node->str_data);
} else if (node->str_type == AST_Str::UNICODE) {
return makeUnicode(emitter, &node->str_data);
} else {
RELEASE_ASSERT(0, "%d", node->str_type);
}
}
CompilerVariable* evalSubscript(AST_Subscript* node, UnwindInfo unw_info) {
CompilerVariable* value = evalExpr(node->value, unw_info);
......
......@@ -154,7 +154,7 @@ def convert(n, f):
elif isinstance(v, str):
_print_str(v, f)
elif isinstance(v, unicode):
_print_str(v.encode("ascii"), f)
_print_str(v.encode("utf8"), f)
elif isinstance(v, bool):
f.write(struct.pack("B", v))
elif isinstance(v, int):
......
......@@ -661,12 +661,9 @@ AST_Str* read_str(BufferedReader* reader) {
rtn->lineno = reader->readULL();
if (rtn->str_type == AST_Str::STR) {
rtn->s = readString(reader);
rtn->str_data = readString(reader);
} else if (rtn->str_type == AST_Str::UNICODE) {
// Don't really support unicode for now...
printf("Warning: converting unicode literal to str\n");
rtn->str_type = AST_Str::STR;
rtn->s = readString(reader);
rtn->str_data = readString(reader);
} else {
RELEASE_ASSERT(0, "%d", rtn->str_type);
}
......
......@@ -504,7 +504,8 @@ struct expr_dispatcher {
ResultPtr read(pypa::AstStr& s) {
AST_Str* ptr = new AST_Str();
location(ptr, s);
ptr->s = s.value;
ptr->str_type = AST_Str::STR;
ptr->str_data = s.value;
return ptr;
}
......@@ -792,7 +793,7 @@ struct stmt_dispatcher {
AST_Str* str = new AST_Str();
ptr->value = str;
str->str_type = AST_Str::STR;
str->s = d.doc;
str->str_data = d.doc;
return ptr;
}
};
......
......@@ -184,6 +184,7 @@ void initGlobalFuncs(GlobalState& g) {
GET(createLong);
GET(createPureImaginary);
GET(createSet);
GET(decodeUTF8StringPtr);
GET(getattr);
GET(setattr);
......
......@@ -34,7 +34,8 @@ struct GlobalFuncs {
llvm::Value* boxInt, *unboxInt, *boxFloat, *unboxFloat, *boxStringPtr, *boxCLFunction, *unboxCLFunction,
*boxInstanceMethod, *boxBool, *unboxBool, *createTuple, *createDict, *createList, *createSlice,
*createUserClass, *createClosure, *createGenerator, *createLong, *createSet, *createPureImaginary;
*createUserClass, *createClosure, *createGenerator, *createLong, *createSet, *createPureImaginary,
*decodeUTF8StringPtr;
llvm::Value* getattr, *setattr, *delattr, *delitem, *delGlobal, *nonzero, *binop, *compare, *augbinop, *unboxedLen,
*getitem, *getclsattr, *getGlobal, *setitem, *unaryop, *import, *importFrom, *importStar, *repr, *str,
*isinstance, *yield, *getPystonIter;
......
......@@ -1627,7 +1627,13 @@ bool PrintVisitor::visit_slice(AST_Slice* node) {
}
bool PrintVisitor::visit_str(AST_Str* node) {
printf("\"%s\"", node->s.c_str());
if (node->str_type == AST_Str::STR) {
printf("\"%s\"", node->str_data.c_str());
} else if (node->str_type == AST_Str::UNICODE) {
printf("<unicode value>");
} else {
RELEASE_ASSERT(0, "%d", node->str_type);
}
return false;
}
......
......@@ -816,18 +816,21 @@ public:
class AST_Str : public AST_expr {
public:
enum StrType {
UNSET = 0x00,
STR = 0x10,
UNICODE = 0x20,
} str_type;
std::string s;
// The meaning of str_data depends on str_type. For STR, it's just the bytes value.
// For UNICODE, it's the utf-8 encoded value.
std::string str_data;
virtual void accept(ASTVisitor* v);
virtual void* accept_expr(ExprVisitor* v);
AST_Str() : AST_expr(AST_TYPE::Str) {}
AST_Str(const std::string& s) : AST_expr(AST_TYPE::Str), str_type(STR), s(s) {}
AST_Str(const std::string&& s) : AST_expr(AST_TYPE::Str), str_type(STR), s(std::move(s)) {}
AST_Str() : AST_expr(AST_TYPE::Str), str_type(UNSET) {}
AST_Str(const std::string& s) : AST_expr(AST_TYPE::Str), str_type(STR), str_data(s) {}
AST_Str(const std::string&& s) : AST_expr(AST_TYPE::Str), str_type(STR), str_data(std::move(s)) {}
static const AST_TYPE::AST_TYPE TYPE = AST_TYPE::Str;
};
......
......@@ -594,7 +594,7 @@ private:
AST_Str* orig = ast_cast<AST_Str>(val);
AST_Str* made = new AST_Str();
made->str_type = orig->str_type;
made->s = orig->s;
made->str_data = orig->str_data;
made->col_offset = orig->col_offset;
made->lineno = orig->lineno;
return made;
......
......@@ -347,16 +347,40 @@ extern "C" Box* chr(Box* arg) {
return boxString(std::string(1, (char)n));
}
extern "C" Box* ord(Box* arg) {
if (arg->cls != str_cls) {
raiseExcHelper(TypeError, "ord() expected string of length 1, but %s found", getTypeName(arg));
extern "C" Box* ord(Box* obj) {
long ord;
Py_ssize_t size;
if (PyString_Check(obj)) {
size = PyString_GET_SIZE(obj);
if (size == 1) {
ord = (long)((unsigned char)*PyString_AS_STRING(obj));
return new BoxedInt(ord);
}
} else if (PyByteArray_Check(obj)) {
size = PyByteArray_GET_SIZE(obj);
if (size == 1) {
ord = (long)((unsigned char)*PyByteArray_AS_STRING(obj));
return new BoxedInt(ord);
}
#ifdef Py_USING_UNICODE
} else if (PyUnicode_Check(obj)) {
size = PyUnicode_GET_SIZE(obj);
if (size == 1) {
ord = (long)*PyUnicode_AS_UNICODE(obj);
return new BoxedInt(ord);
}
#endif
} else {
raiseExcHelper(TypeError, "ord() expected string of length 1, but "
"%.200s found",
obj->cls->tp_name);
}
const std::string& s = static_cast<BoxedString*>(arg)->s;
if (s.size() != 1)
raiseExcHelper(TypeError, "ord() expected string of length 1, but string of length %d found", s.size());
return boxInt(s[0]);
raiseExcHelper(TypeError, "ord() expected a character, "
"but string of length %zd found",
size);
}
Box* range(Box* start, Box* stop, Box* step) {
......@@ -1015,6 +1039,8 @@ void setupBuiltins() {
boxRTFunction((void*)vars, UNKNOWN, 1, 1, false, false), "vars", { NULL }));
builtins_module->giveAttr("object", object_cls);
builtins_module->giveAttr("str", str_cls);
assert(unicode_cls);
builtins_module->giveAttr("unicode", unicode_cls);
builtins_module->giveAttr("basestring", basestring_cls);
// builtins_module->giveAttr("unicode", unicode_cls);
builtins_module->giveAttr("int", int_cls);
......
......@@ -33,6 +33,11 @@ namespace pyston {
BoxedModule* sys_module;
BoxedDict* sys_modules_dict;
extern "C" {
// supposed to be exposed through sys.flags
int Py_BytesWarningFlag = 0;
}
Box* sysExcInfo() {
ExcInfo* exc = getFrameExcInfo();
assert(exc->type);
......
......@@ -154,11 +154,6 @@ extern "C" PyVarObject* PyObject_InitVar(PyVarObject* op, PyTypeObject* tp, Py_s
return op;
}
extern "C" void PyObject_Free(void* p) noexcept {
gc::gc_free(p);
ASSERT(0, "I think this is good enough but I'm not sure; should test");
}
extern "C" PyObject* PyObject_Format(PyObject* obj, PyObject* format_spec) noexcept {
PyObject* empty = NULL;
PyObject* result = NULL;
......@@ -883,6 +878,18 @@ extern "C" PyObject* PyCallIter_New(PyObject* callable, PyObject* sentinel) noex
Py_FatalError("unimplemented");
}
extern "C" void* PyObject_Malloc(size_t sz) noexcept {
return gc_compat_malloc(sz);
}
extern "C" void* PyObject_Realloc(void* ptr, size_t sz) noexcept {
return gc_compat_realloc(ptr, sz);
}
extern "C" void PyObject_Free(void* ptr) noexcept {
gc_compat_free(ptr);
}
extern "C" void* PyMem_Malloc(size_t sz) noexcept {
return gc_compat_malloc(sz);
}
......@@ -1165,22 +1172,6 @@ extern "C" Py_ssize_t PyNumber_AsSsize_t(PyObject* o, PyObject* exc) noexcept {
return n;
}
extern "C" Py_ssize_t PyUnicode_GET_SIZE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" Py_UNICODE* PyUnicode_AS_UNICODE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" const char* PyUnicode_AS_DATA(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyBuffer_IsContiguous(Py_buffer* view, char fort) noexcept {
Py_FatalError("unimplemented");
}
......@@ -1484,6 +1475,10 @@ extern "C" int _PyEval_SliceIndex(PyObject* v, Py_ssize_t* pi) noexcept {
return 1;
}
extern "C" PyObject* PyBuffer_FromMemory(void* ptr, Py_ssize_t size) noexcept {
Py_FatalError("unimplemented");
}
BoxedModule* importTestExtension(const std::string& name) {
std::string pathname_name = "test/test_extension/" + name + ".pyston.so";
const char* pathname = pathname_name.c_str();
......
......@@ -65,6 +65,7 @@ void force() {
FORCE(createLong);
FORCE(createPureImaginary);
FORCE(createSet);
FORCE(decodeUTF8StringPtr);
FORCE(getattr);
FORCE(setattr);
......
......@@ -39,6 +39,16 @@ extern "C" int PyList_Append(PyObject* op, PyObject* newitem) noexcept {
return 0;
}
extern "C" int PyList_Reverse(PyObject* v) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject** PyList_Items(PyObject* op) noexcept {
RELEASE_ASSERT(PyList_Check(op), "");
return &static_cast<BoxedList*>(op)->elts->elts[0];
}
extern "C" Box* listRepr(BoxedList* self) {
LOCK_REGION(self->lock.asRead());
......
......@@ -47,6 +47,11 @@ BoxedString::BoxedString(const std::string& s) : s(s) {
gc::registerGCManagedBytes(this->s.size());
}
extern "C" char PyString_GetItem(PyObject* op, ssize_t n) noexcept {
RELEASE_ASSERT(PyString_Check(op), "");
return static_cast<const BoxedString*>(op)->s[n];
}
extern "C" PyObject* PyString_FromFormatV(const char* format, va_list vargs) noexcept {
va_list count;
Py_ssize_t n = 0;
......@@ -1552,13 +1557,13 @@ Box* strPartition(BoxedString* self, BoxedString* sep) {
self->s.size() - found_idx - sep->s.size()) });
}
extern "C" PyObject* do_string_format(PyObject* self, PyObject* args, PyObject* kwargs);
extern "C" PyObject* _do_string_format(PyObject* self, PyObject* args, PyObject* kwargs);
Box* strFormat(BoxedString* self, BoxedTuple* args, BoxedDict* kwargs) {
assert(args->cls == tuple_cls);
assert(kwargs->cls == dict_cls);
Box* rtn = do_string_format(self, args, kwargs);
Box* rtn = _do_string_format(self, args, kwargs);
checkAndThrowCAPIException();
assert(rtn);
return rtn;
......@@ -2020,6 +2025,38 @@ extern "C" PyObject* PyString_FromString(const char* s) noexcept {
return boxStrConstant(s);
}
extern "C" int PyString_AsStringAndSize(register PyObject* obj, register char** s, register Py_ssize_t* len) noexcept {
if (s == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (!PyString_Check(obj)) {
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(obj)) {
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
if (obj == NULL)
return -1;
} else
#endif
{
PyErr_Format(PyExc_TypeError, "expected string or Unicode object, "
"%.200s found",
Py_TYPE(obj)->tp_name);
return -1;
}
}
*s = PyString_AS_STRING(obj);
if (len != NULL)
*len = PyString_GET_SIZE(obj);
else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
PyErr_SetString(PyExc_TypeError, "expected string without null bytes");
return -1;
}
return 0;
}
BoxedString* createUninitializedString(ssize_t n) {
// I *think* this should avoid doing any copies, by using move constructors:
return new BoxedString(std::string(n, '\x00'));
......@@ -2052,9 +2089,15 @@ extern "C" char* PyString_AsString(PyObject* o) noexcept {
return getWriteableStringContents(s);
}
extern "C" Py_ssize_t PyString_Size(PyObject* s) noexcept {
RELEASE_ASSERT(s->cls == str_cls, "");
return static_cast<BoxedString*>(s)->s.size();
extern "C" Py_ssize_t PyString_Size(PyObject* op) noexcept {
if (op->cls == str_cls)
return static_cast<BoxedString*>(op)->s.size();
char* _s;
Py_ssize_t len;
if (PyString_AsStringAndSize(op, &_s, &len))
return -1;
return len;
}
extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
......
......@@ -68,6 +68,12 @@ Box* tupleGetitemInt(BoxedTuple* self, BoxedInt* slice) {
return tupleGetitemUnboxed(self, slice->n);
}
extern "C" PyObject** PyTuple_Items(PyObject* op) noexcept {
RELEASE_ASSERT(PyTuple_Check(op), "");
return &static_cast<BoxedTuple*>(op)->elts[0];
}
extern "C" PyObject* PyTuple_GetItem(PyObject* op, Py_ssize_t i) noexcept {
RELEASE_ASSERT(PyTuple_Check(op), "");
RELEASE_ASSERT(i >= 0, ""); // unlike tuple.__getitem__, PyTuple_GetItem doesn't do index wrapping
......
......@@ -64,6 +64,7 @@ extern "C" void initarray();
extern "C" void initzlib();
extern "C" void init_codecs();
extern "C" void init_socket();
extern "C" void _PyUnicode_Init();
namespace pyston {
......@@ -518,7 +519,7 @@ extern "C" void closureGCHandler(GCVisitor* v, Box* b) {
extern "C" {
BoxedClass* object_cls, *type_cls, *none_cls, *bool_cls, *int_cls, *float_cls,
* str_cls = NULL, *function_cls, *instancemethod_cls, *list_cls, *slice_cls, *module_cls, *dict_cls, *tuple_cls,
*file_cls, *member_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls, *unicode_cls, *property_cls,
*file_cls, *member_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls, *property_cls,
*staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls, *capi_getset_cls,
*builtin_function_or_method_cls;
......@@ -1171,6 +1172,13 @@ extern "C" PyObject* PyObject_Init(PyObject* op, PyTypeObject* tp) noexcept {
return op;
}
Box* decodeUTF8StringPtr(const std::string* s) {
Box* rtn = PyUnicode_DecodeUTF8(s->c_str(), s->size(), "strict");
checkAndThrowCAPIException();
assert(rtn);
return rtn;
}
bool TRACK_ALLOCATIONS = false;
void setupRuntime() {
root_hcls = HiddenClass::makeRoot();
......@@ -1217,8 +1225,6 @@ void setupRuntime() {
gc::enableGC();
unicode_cls = new BoxedHeapClass(basestring_cls, NULL, 0, sizeof(BoxedUnicode), false, "unicode");
// It wasn't safe to add __base__ attributes until object+type+str are set up, so do that now:
type_cls->giveAttr("__base__", object_cls);
basestring_cls->giveAttr("__base__", object_cls);
......@@ -1336,7 +1342,7 @@ void setupRuntime() {
setupIter();
setupClassobj();
setupSuper();
setupUnicode();
_PyUnicode_Init();
setupDescr();
setupTraceback();
......
......@@ -62,7 +62,6 @@ void teardownFile();
void setupCAPI();
void teardownCAPI();
void setupGenerator();
void setupUnicode();
void setupDescr();
void teardownDescr();
......@@ -81,9 +80,11 @@ extern "C" {
extern BoxedClass* object_cls, *type_cls, *bool_cls, *int_cls, *long_cls, *float_cls, *str_cls, *function_cls,
*none_cls, *instancemethod_cls, *list_cls, *slice_cls, *module_cls, *dict_cls, *tuple_cls, *file_cls,
*enumerate_cls, *xrange_cls, *member_cls, *method_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls,
*unicode_cls, *property_cls, *staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls,
*capi_getset_cls, *builtin_function_or_method_cls;
*property_cls, *staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls, *capi_getset_cls,
*builtin_function_or_method_cls;
}
#define unicode_cls (&PyUnicode_Type)
extern "C" {
extern Box* None, *NotImplemented, *True, *False;
}
......@@ -107,6 +108,7 @@ Box* boxString(const std::string& s);
Box* boxString(std::string&& s);
extern "C" BoxedString* boxStrConstant(const char* chars);
extern "C" BoxedString* boxStrConstantSize(const char* chars, size_t n);
extern "C" Box* decodeUTF8StringPtr(const std::string* s);
// creates an uninitialized string of length n; useful for directly constructing into the string and avoiding copies:
BoxedString* createUninitializedString(ssize_t n);
......
This diff is collapsed.
# skip-if: '-x' in EXTRA_JIT_ARGS
# allow-warning: import level 0 will be treated as -1
print repr(unicode())
print repr(unicode('hello world'))
# Some random unicode character:
u = u'\u0180'
print len(u)
print repr(u)
print repr(u.encode("utf8"))
# This is tricky, since we need to support file encodings, and then set stdout to UTF8:
# print u
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment