Commit 398ee469 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Use CPython (2.7's) unicode implementation

parent f4b1b16a
......@@ -292,8 +292,8 @@ STDLIB_RELEASE_OBJS := stdlib.release.bc.o
ASM_SRCS := $(wildcard src/runtime/*.S)
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c socketmodule.c $(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c exceptions.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c $(EXTRA_STDPYTHON_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c exceptions.c unicodeobject.c unicodectype.c bytearrayobject.c bytes_methods.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c formatter_unicode.c $(EXTRA_STDPYTHON_SRCS)
FROM_CPYTHON_SRCS := $(addprefix from_cpython/Modules/,$(STDMODULE_SRCS)) $(addprefix from_cpython/Objects/,$(STDOBJECT_SRCS)) $(addprefix from_cpython/Python/,$(STDPYTHON_SRCS))
# The stdlib objects have slightly longer dependency chains,
......
......@@ -18,10 +18,10 @@ add_custom_target(copy_stdlib ALL DEPENDS ${STDLIB_TARGETS})
file(GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c socketmodule.c)
# compile specified files in from_cpython/Objects
file(GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c exceptions.c)
file(GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c exceptions.c unicodeobject.c unicodectype.c bytearrayobject.c bytes_methods.c)
# compile specified files in from_cpython/Python
file(GLOB_RECURSE STDPYTHON_SRCS Python getargs.c pyctype.c formatter_string.c pystrtod.c dtoa.c)
file(GLOB_RECURSE STDPYTHON_SRCS Python getargs.c pyctype.c formatter_string.c pystrtod.c dtoa.c formatter_unicode.c)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-field-initializers -Wno-tautological-compare -Wno-type-limits")
add_library(FROM_CPYTHON OBJECT ${STDMODULE_SRCS} ${STDOBJECT_SRCS} ${STDPYTHON_SRCS})
......@@ -1184,8 +1184,8 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
*/
#define PySequence_Fast_ITEMS(sf) \
(PyList_Check(sf) ? ((PyListObject *)(sf))->ob_item \
: ((PyTupleObject *)(sf))->ob_item)
(PyList_Check(sf) ? (PyList_Items(sf)) \
: (PyTuple_Items(sf)))
/* Return a pointer to the underlying item array for
an object retured by PySequence_Fast */
......
......@@ -70,6 +70,9 @@ PyAPI_FUNC(int) PyList_Reverse(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(PyObject **) PyList_Items(PyObject *) PYSTON_NOEXCEPT;
/* Macro, trading safety for speed */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#define PyList_GET_ITEM(op, i) PyList_GetItem((PyObject*)(op), (i))
......
......@@ -99,6 +99,9 @@ PyAPI_FUNC(void) PyString_InternImmortal(PyObject **) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(char) PyString_GetItem(PyObject *, Py_ssize_t) PYSTON_NOEXCEPT;
/* Use only if you know it's a string */
#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
......
......@@ -59,6 +59,9 @@ PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(PyObject **) PyTuple_Items(PyObject *) PYSTON_NOEXCEPT;
/* Macro, trading safety for speed */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#define PyTuple_GET_ITEM(op, i) PyTuple_GetItem(op, i)
......
......@@ -414,8 +414,6 @@ extern "C" {
/* --- Unicode Type ------------------------------------------------------- */
// Pyston change: this is not our object format
#if 0
typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Length of raw Unicode data in buffer */
......@@ -425,14 +423,8 @@ typedef struct {
string, or NULL; this is used for
implementing the buffer protocol */
} PyUnicodeObject;
#endif
struct _PyUnicodeObject;
typedef struct _PyUnicodeObject PyUnicodeObject;
// Pyston change: this is no longer a static object
PyAPI_DATA(PyTypeObject*) unicode_cls;
#define PyUnicode_Type (*unicode_cls)
//PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
// Pyston changes: these aren't direct macros any more [they potentially could be though]
PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
......@@ -443,8 +435,6 @@ PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
#endif
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#if 0
/* Fast access macros */
#define PyUnicode_GET_SIZE(op) \
(((PyUnicodeObject *)(op))->length)
......@@ -454,11 +444,6 @@ PyAPI_FUNC(bool) _PyUnicode_Check(PyObject*) PYSTON_NOEXCEPT;
(((PyUnicodeObject *)(op))->str)
#define PyUnicode_AS_DATA(op) \
((const char *)((PyUnicodeObject *)(op))->str)
#endif
PyAPI_FUNC(Py_ssize_t) PyUnicode_GET_SIZE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(Py_ssize_t) PyUnicode_GET_DATA_SIZE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AS_UNICODE(PyObject*) PYSTON_NOEXCEPT;
PyAPI_FUNC(const char *) PyUnicode_AS_DATA(PyObject*) PYSTON_NOEXCEPT;
/* --- Constants ---------------------------------------------------------- */
......
......@@ -34,7 +34,8 @@ _getbytevalue(PyObject* arg, int *value)
PyErr_SetString(PyExc_ValueError, "string must be of size 1");
return 0;
}
*value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
// Pyston change, was: *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
*value = Py_CHARMASK(PyString_GetItem(arg, 0));
return 1;
}
else if (PyInt_Check(arg) || PyLong_Check(arg)) {
......@@ -2893,7 +2894,8 @@ Construct a zero-initialized bytearray of the given length.");
static PyObject *bytearray_iter(PyObject *seq);
PyTypeObject PyByteArray_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
// Pyston change, was &PyType_Type:
PyVarObject_HEAD_INIT(NULL, 0)
"bytearray",
sizeof(PyByteArrayObject),
0,
......@@ -3002,7 +3004,8 @@ static PyMethodDef bytearrayiter_methods[] = {
};
PyTypeObject PyByteArrayIter_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
// Pyston change, was &PyType_Type:
PyVarObject_HEAD_INIT(NULL, 0)
"bytearray_iterator", /* tp_name */
sizeof(bytesiterobject), /* tp_basicsize */
0, /* tp_itemsize */
......
......@@ -1025,8 +1025,7 @@ done:
/************************************************************************/
/* this is the main entry point */
// Pyston change: changed to non-static
/* static */ PyObject *
static PyObject *
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
{
SubString input;
......
......@@ -7,3 +7,12 @@
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h"
// do_string_format needs to be declared as a static function, since it's used by both stringobject.c
// and unicodeobject.c. We want to access it from str.cpp, though, so just use this little forwarding
// function.
// We could also potentially have tried to modifie string_format.h to choose whether to mark the function
// as static or not.
PyObject * _do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) {
return do_string_format(self, args, kwargs);
}
......@@ -423,7 +423,7 @@ int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length)
return -1;
}
v = *unicode;
if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) {
if (v == NULL || !PyUnicode_Check(v) || /* Pyston change, can't check this: Py_REFCNT(v) != 1 || */ length < 0) {
PyErr_BadInternalCall();
return -1;
}
......@@ -8140,7 +8140,7 @@ formatfloat(PyObject *v, int flags, int prec, int type)
static PyObject*
formatlong(PyObject *val, int flags, int prec, int type)
{
char *buf;
const char *buf;
int i, len;
PyObject *str; /* temporary string object. */
PyUnicodeObject *result;
......@@ -8833,6 +8833,9 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)pnew;
}
// Pyston change: Leave this in as a reminder in case we want to go back to using it:
#define Py_TPFLAGS_UNICODE_SUBCLASS (0)
PyDoc_STRVAR(unicode_doc,
"unicode(object='') -> unicode object\n\
unicode(string[, encoding[, errors]]) -> unicode object\n\
......@@ -8842,7 +8845,8 @@ encoding defaults to the current default string encoding.\n\
errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
PyTypeObject PyUnicode_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
// Pyston change, was &Py_Type_Type:
PyVarObject_HEAD_INIT(NULL, 0)
"unicode", /* tp_name */
sizeof(PyUnicodeObject), /* tp_size */
0, /* tp_itemsize */
......@@ -8874,7 +8878,8 @@ PyTypeObject PyUnicode_Type = {
unicode_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
&PyBaseString_Type, /* tp_base */
// Pyston change, this was &PyBaseString_Type; we set this explicitly in _PyUnicode_Init:
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
......@@ -8901,6 +8906,15 @@ void _PyUnicode_Init(void)
0x2029, /* PARAGRAPH SEPARATOR */
};
// Pyston change: we didn't set this above
assert(&PyBaseString_Type != NULL);
PyUnicode_Type.tp_base = &PyBaseString_Type;
// Pyston change: moved this above the initial call to _PyUnicode_New
// in order to register the static object with the GC:
if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'");
/* Init the implementation */
if (!unicode_empty) {
unicode_empty = _PyUnicode_New(0);
......@@ -8908,9 +8922,6 @@ void _PyUnicode_Init(void)
return;
}
if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'");
/* initialize the linebreak bloom filter */
bloom_linebreak = make_bloom_mask(
linebreak, sizeof(linebreak) / sizeof(linebreak[0])
......
......@@ -27,6 +27,14 @@
namespace pyston {
extern "C" Py_ssize_t _PyObject_LengthHint(PyObject* o, Py_ssize_t defaultvalue) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyBuffer_ToContiguous(void* buf, Py_buffer* view, Py_ssize_t len, char fort) noexcept {
Py_FatalError("unimplemented");
}
static PyObject* type_error(const char* msg, PyObject* obj) noexcept {
PyErr_Format(PyExc_TypeError, msg, Py_TYPE(obj)->tp_name);
return NULL;
......
......@@ -26,6 +26,10 @@
namespace pyston {
extern "C" {
_Py_HashSecret_t _Py_HashSecret;
}
extern "C" PyObject* PyObject_Unicode(PyObject* v) noexcept {
Py_FatalError("unimplemented");
}
......
......@@ -1015,6 +1015,8 @@ void setupBuiltins() {
boxRTFunction((void*)vars, UNKNOWN, 1, 1, false, false), "vars", { NULL }));
builtins_module->giveAttr("object", object_cls);
builtins_module->giveAttr("str", str_cls);
assert(unicode_cls);
builtins_module->giveAttr("unicode", unicode_cls);
builtins_module->giveAttr("basestring", basestring_cls);
// builtins_module->giveAttr("unicode", unicode_cls);
builtins_module->giveAttr("int", int_cls);
......
......@@ -33,6 +33,11 @@ namespace pyston {
BoxedModule* sys_module;
BoxedDict* sys_modules_dict;
extern "C" {
// supposed to be exposed through sys.flags
int Py_BytesWarningFlag = 0;
}
Box* sysExcInfo() {
ExcInfo* exc = getFrameExcInfo();
assert(exc->type);
......
......@@ -154,11 +154,6 @@ extern "C" PyVarObject* PyObject_InitVar(PyVarObject* op, PyTypeObject* tp, Py_s
return op;
}
extern "C" void PyObject_Free(void* p) noexcept {
gc::gc_free(p);
ASSERT(0, "I think this is good enough but I'm not sure; should test");
}
extern "C" PyObject* PyObject_Format(PyObject* obj, PyObject* format_spec) noexcept {
PyObject* empty = NULL;
PyObject* result = NULL;
......@@ -883,6 +878,18 @@ extern "C" PyObject* PyCallIter_New(PyObject* callable, PyObject* sentinel) noex
Py_FatalError("unimplemented");
}
extern "C" void* PyObject_Malloc(size_t sz) noexcept {
return gc_compat_malloc(sz);
}
extern "C" void* PyObject_Realloc(void* ptr, size_t sz) noexcept {
return gc_compat_realloc(ptr, sz);
}
extern "C" void PyObject_Free(void* ptr) noexcept {
gc_compat_free(ptr);
}
extern "C" void* PyMem_Malloc(size_t sz) noexcept {
return gc_compat_malloc(sz);
}
......@@ -1165,22 +1172,6 @@ extern "C" Py_ssize_t PyNumber_AsSsize_t(PyObject* o, PyObject* exc) noexcept {
return n;
}
extern "C" Py_ssize_t PyUnicode_GET_SIZE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" Py_UNICODE* PyUnicode_AS_UNICODE(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" const char* PyUnicode_AS_DATA(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyBuffer_IsContiguous(Py_buffer* view, char fort) noexcept {
Py_FatalError("unimplemented");
}
......@@ -1484,6 +1475,10 @@ extern "C" int _PyEval_SliceIndex(PyObject* v, Py_ssize_t* pi) noexcept {
return 1;
}
extern "C" PyObject* PyBuffer_FromMemory(void* ptr, Py_ssize_t size) noexcept {
Py_FatalError("unimplemented");
}
BoxedModule* importTestExtension(const std::string& name) {
std::string pathname_name = "test/test_extension/" + name + ".pyston.so";
const char* pathname = pathname_name.c_str();
......
......@@ -39,6 +39,16 @@ extern "C" int PyList_Append(PyObject* op, PyObject* newitem) noexcept {
return 0;
}
extern "C" int PyList_Reverse(PyObject* v) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject** PyList_Items(PyObject* op) noexcept {
RELEASE_ASSERT(PyList_Check(op), "");
return &static_cast<BoxedList*>(op)->elts->elts[0];
}
extern "C" Box* listRepr(BoxedList* self) {
LOCK_REGION(self->lock.asRead());
......
......@@ -47,6 +47,11 @@ BoxedString::BoxedString(const std::string& s) : s(s) {
gc::registerGCManagedBytes(this->s.size());
}
extern "C" char PyString_GetItem(PyObject* op, ssize_t n) noexcept {
RELEASE_ASSERT(PyString_Check(op), "");
return static_cast<const BoxedString*>(op)->s[n];
}
extern "C" PyObject* PyString_FromFormatV(const char* format, va_list vargs) noexcept {
va_list count;
Py_ssize_t n = 0;
......@@ -1552,13 +1557,13 @@ Box* strPartition(BoxedString* self, BoxedString* sep) {
self->s.size() - found_idx - sep->s.size()) });
}
extern "C" PyObject* do_string_format(PyObject* self, PyObject* args, PyObject* kwargs);
extern "C" PyObject* _do_string_format(PyObject* self, PyObject* args, PyObject* kwargs);
Box* strFormat(BoxedString* self, BoxedTuple* args, BoxedDict* kwargs) {
assert(args->cls == tuple_cls);
assert(kwargs->cls == dict_cls);
Box* rtn = do_string_format(self, args, kwargs);
Box* rtn = _do_string_format(self, args, kwargs);
checkAndThrowCAPIException();
assert(rtn);
return rtn;
......
......@@ -68,6 +68,12 @@ Box* tupleGetitemInt(BoxedTuple* self, BoxedInt* slice) {
return tupleGetitemUnboxed(self, slice->n);
}
extern "C" PyObject** PyTuple_Items(PyObject* op) noexcept {
RELEASE_ASSERT(PyTuple_Check(op), "");
return &static_cast<BoxedTuple*>(op)->elts[0];
}
extern "C" PyObject* PyTuple_GetItem(PyObject* op, Py_ssize_t i) noexcept {
RELEASE_ASSERT(PyTuple_Check(op), "");
RELEASE_ASSERT(i >= 0, ""); // unlike tuple.__getitem__, PyTuple_GetItem doesn't do index wrapping
......
......@@ -64,6 +64,7 @@ extern "C" void initarray();
extern "C" void initzlib();
extern "C" void init_codecs();
extern "C" void init_socket();
extern "C" void _PyUnicode_Init();
namespace pyston {
......@@ -518,7 +519,7 @@ extern "C" void closureGCHandler(GCVisitor* v, Box* b) {
extern "C" {
BoxedClass* object_cls, *type_cls, *none_cls, *bool_cls, *int_cls, *float_cls,
* str_cls = NULL, *function_cls, *instancemethod_cls, *list_cls, *slice_cls, *module_cls, *dict_cls, *tuple_cls,
*file_cls, *member_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls, *unicode_cls, *property_cls,
*file_cls, *member_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls, *property_cls,
*staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls, *capi_getset_cls,
*builtin_function_or_method_cls;
......@@ -1217,8 +1218,6 @@ void setupRuntime() {
gc::enableGC();
unicode_cls = new BoxedHeapClass(basestring_cls, NULL, 0, sizeof(BoxedUnicode), false, "unicode");
// It wasn't safe to add __base__ attributes until object+type+str are set up, so do that now:
type_cls->giveAttr("__base__", object_cls);
basestring_cls->giveAttr("__base__", object_cls);
......@@ -1336,7 +1335,7 @@ void setupRuntime() {
setupIter();
setupClassobj();
setupSuper();
setupUnicode();
_PyUnicode_Init();
setupDescr();
setupTraceback();
......
......@@ -62,7 +62,6 @@ void teardownFile();
void setupCAPI();
void teardownCAPI();
void setupGenerator();
void setupUnicode();
void setupDescr();
void teardownDescr();
......@@ -81,9 +80,11 @@ extern "C" {
extern BoxedClass* object_cls, *type_cls, *bool_cls, *int_cls, *long_cls, *float_cls, *str_cls, *function_cls,
*none_cls, *instancemethod_cls, *list_cls, *slice_cls, *module_cls, *dict_cls, *tuple_cls, *file_cls,
*enumerate_cls, *xrange_cls, *member_cls, *method_cls, *closure_cls, *generator_cls, *complex_cls, *basestring_cls,
*unicode_cls, *property_cls, *staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls,
*capi_getset_cls, *builtin_function_or_method_cls;
*property_cls, *staticmethod_cls, *classmethod_cls, *attrwrapper_cls, *pyston_getset_cls, *capi_getset_cls,
*builtin_function_or_method_cls;
}
#define unicode_cls (&PyUnicode_Type)
extern "C" {
extern Box* None, *NotImplemented, *True, *False;
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment