Unmodified CPython versions of some unicode files

f4b1b16a · Kevin Modzelewski · 077fffc6 · f4b1b16a · f4b1b16a · f4b1b16a
Commit f4b1b16a authored Feb 24, 2015 by Kevin Modzelewski
10 changed files
--- a/from_cpython/Include/Python.h
+++ b/from_cpython/Include/Python.h
@@ -58,7 +58,9 @@
 #include "complexobject.h"
 #endif
 #include "stringobject.h"
+#include "bufferobject.h"
 #include "bytesobject.h"
+#include "bytearrayobject.h"
 #include "listobject.h"
 #include "dictobject.h"
 #include "tupleobject.h"

--- a/from_cpython/Include/bufferobject.h
+++ b/from_cpython/Include/bufferobject.h
+// This file is originally from CPython 2.7, with modifications for Pyston
+
+/* Buffer object interface */
+
+/* Note: the object's structure is private */
+
+#ifndef Py_BUFFEROBJECT_H
+#define Py_BUFFEROBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+PyAPI_DATA(PyTypeObject) PyBuffer_Type;
+
+#define PyBuffer_Check(op) (Py_TYPE(op) == &PyBuffer_Type)
+
+#define Py_END_OF_BUFFER	(-1)
+
+PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base,
+                                           Py_ssize_t offset, Py_ssize_t size) PYSTON_NOEXCEPT;
+PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base,
+                                                    Py_ssize_t offset,
+                                                    Py_ssize_t size) PYSTON_NOEXCEPT;
+
+PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size) PYSTON_NOEXCEPT;
+PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size) PYSTON_NOEXCEPT;
+
+PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size) PYSTON_NOEXCEPT;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_BUFFEROBJECT_H */
--- a/from_cpython/Include/bytearrayobject.h
+++ b/from_cpython/Include/bytearrayobject.h
+// This file is originally from CPython 2.7, with modifications for Pyston
+
+/* ByteArray object interface */
+
+#ifndef Py_BYTEARRAYOBJECT_H
+#define Py_BYTEARRAYOBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+
+/* Type PyByteArrayObject represents a mutable array of bytes.
+ * The Python API is that of a sequence;
+ * the bytes are mapped to ints in [0, 256).
+ * Bytes are not characters; they may be used to encode characters.
+ * The only way to go between bytes and str/unicode is via encoding
+ * and decoding.
+ * For the convenience of C programmers, the bytes type is considered
+ * to contain a char pointer, not an unsigned char pointer.
+ */
+
+/* Object layout */
+typedef struct {
+    PyObject_VAR_HEAD
+    /* XXX(nnorwitz): should ob_exports be Py_ssize_t? */
+    int ob_exports; /* how many buffer exports */
+    Py_ssize_t ob_alloc; /* How many bytes allocated */
+    char *ob_bytes;
+} PyByteArrayObject;
+
+/* Type object */
+PyAPI_DATA(PyTypeObject) PyByteArray_Type;
+PyAPI_DATA(PyTypeObject) PyByteArrayIter_Type;
+
+/* Type check macros */
+#define PyByteArray_Check(self) PyObject_TypeCheck(self, &PyByteArray_Type)
+#define PyByteArray_CheckExact(self) (Py_TYPE(self) == &PyByteArray_Type)
+
+/* Direct API functions */
+PyAPI_FUNC(PyObject *) PyByteArray_FromObject(PyObject *) PYSTON_NOEXCEPT;
+PyAPI_FUNC(PyObject *) PyByteArray_Concat(PyObject *, PyObject *) PYSTON_NOEXCEPT;
+PyAPI_FUNC(PyObject *) PyByteArray_FromStringAndSize(const char *, Py_ssize_t) PYSTON_NOEXCEPT;
+PyAPI_FUNC(Py_ssize_t) PyByteArray_Size(PyObject *) PYSTON_NOEXCEPT;
+PyAPI_FUNC(char *) PyByteArray_AsString(PyObject *) PYSTON_NOEXCEPT;
+PyAPI_FUNC(int) PyByteArray_Resize(PyObject *, Py_ssize_t) PYSTON_NOEXCEPT;
+
+/* Macros, trading safety for speed */
+#define PyByteArray_AS_STRING(self) \
+    (assert(PyByteArray_Check(self)), \
+     Py_SIZE(self) ? ((PyByteArrayObject *)(self))->ob_bytes : _PyByteArray_empty_string)
+#define PyByteArray_GET_SIZE(self)  (assert(PyByteArray_Check(self)),Py_SIZE(self))
+
+PyAPI_DATA(char) _PyByteArray_empty_string[];
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_BYTEARRAYOBJECT_H */
--- a/from_cpython/Include/bytes_methods.h
+++ b/from_cpython/Include/bytes_methods.h
+// This file is originally from CPython 2.7, with modifications for Pyston
+
+#ifndef Py_BYTES_CTYPE_H
+#define Py_BYTES_CTYPE_H
+
+/*
+ * The internal implementation behind PyString (bytes) and PyBytes (buffer)
+ * methods of the given names, they operate on ASCII byte strings.
+ */
+extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+
+/* These store their len sized answer in the given preallocated *result arg. */
+extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
+extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len) PYSTON_NOEXCEPT;
+
+/* Shared __doc__ strings. */
+extern const char _Py_isspace__doc__[];
+extern const char _Py_isalpha__doc__[];
+extern const char _Py_isalnum__doc__[];
+extern const char _Py_isdigit__doc__[];
+extern const char _Py_islower__doc__[];
+extern const char _Py_isupper__doc__[];
+extern const char _Py_istitle__doc__[];
+extern const char _Py_lower__doc__[];
+extern const char _Py_upper__doc__[];
+extern const char _Py_title__doc__[];
+extern const char _Py_capitalize__doc__[];
+extern const char _Py_swapcase__doc__[];
+
+/* These are left in for backward compatibility and will be removed
+   in 2.8/3.2 */
+#define ISLOWER(c)  Py_ISLOWER(c)
+#define ISUPPER(c)  Py_ISUPPER(c)
+#define ISALPHA(c)  Py_ISALPHA(c)
+#define ISDIGIT(c)  Py_ISDIGIT(c)
+#define ISXDIGIT(c) Py_ISXDIGIT(c)
+#define ISALNUM(c)  Py_ISALNUM(c)
+#define ISSPACE(c)  Py_ISSPACE(c)
+
+#undef islower
+#define islower(c) undefined_islower(c)
+#undef isupper
+#define isupper(c) undefined_isupper(c)
+#undef isalpha
+#define isalpha(c) undefined_isalpha(c)
+#undef isdigit
+#define isdigit(c) undefined_isdigit(c)
+#undef isxdigit
+#define isxdigit(c) undefined_isxdigit(c)
+#undef isalnum
+#define isalnum(c) undefined_isalnum(c)
+#undef isspace
+#define isspace(c) undefined_isspace(c)
+
+/* These are left in for backward compatibility and will be removed
+   in 2.8/3.2 */
+#define TOLOWER(c) Py_TOLOWER(c)
+#define TOUPPER(c) Py_TOUPPER(c)
+
+#undef tolower
+#define tolower(c) undefined_tolower(c)
+#undef toupper
+#define toupper(c) undefined_toupper(c)
+
+/* this is needed because some docs are shared from the .o, not static */
+#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str)
+
+#endif /* !Py_BYTES_CTYPE_H */
--- a/from_cpython/Include/ucnhash.h
+++ b/from_cpython/Include/ucnhash.h
+// This file is originally from CPython 2.7, with modifications for Pyston
+
+/* Unicode name database interface */
+
+#ifndef Py_UCNHASH_H
+#define Py_UCNHASH_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* revised ucnhash CAPI interface (exported through a "wrapper") */
+
+#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI"
+
+typedef struct {
+
+    /* Size of this struct */
+    int size;
+
+    /* Get name for a given character code.  Returns non-zero if
+       success, zero if not.  Does not set Python exceptions. 
+       If self is NULL, data come from the default version of the database.
+       If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
+    int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen);
+
+    /* Get character code for a given name.  Same error handling
+       as for getname. */
+    int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code);
+
+} _PyUnicode_Name_CAPI;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_UCNHASH_H */
--- a/from_cpython/Objects/bytearrayobject.c
+++ b/from_cpython/Objects/bytearrayobject.c
--- a/from_cpython/Objects/bytes_methods.c
+++ b/from_cpython/Objects/bytes_methods.c
--- a/from_cpython/Objects/unicodectype.c
+++ b/from_cpython/Objects/unicodectype.c
+// This file is originally from CPython 2.7, with modifications for Pyston
+
+/*
+   Unicode character type helpers.
+
+   Written by Marc-Andre Lemburg (mal@lemburg.com).
+   Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
+
+   Copyright (c) Corporation for National Research Initiatives.
+
+*/
+
+#include "Python.h"
+#include "unicodeobject.h"
+
+#define ALPHA_MASK 0x01
+#define DECIMAL_MASK 0x02
+#define DIGIT_MASK 0x04
+#define LOWER_MASK 0x08
+#define LINEBREAK_MASK 0x10
+#define SPACE_MASK 0x20
+#define TITLE_MASK 0x40
+#define UPPER_MASK 0x80
+#define NODELTA_MASK 0x100
+#define NUMERIC_MASK 0x200
+
+typedef struct {
+    const Py_UNICODE upper;
+    const Py_UNICODE lower;
+    const Py_UNICODE title;
+    const unsigned char decimal;
+    const unsigned char digit;
+    const unsigned short flags;
+} _PyUnicode_TypeRecord;
+
+#include "unicodetype_db.h"
+
+static const _PyUnicode_TypeRecord *
+gettyperecord(Py_UNICODE code)
+{
+    int index;
+
+#ifdef Py_UNICODE_WIDE
+    if (code >= 0x110000)
+        index = 0;
+    else
+#endif
+    {
+        index = index1[(code>>SHIFT)];
+        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
+    }
+
+    return &_PyUnicode_TypeRecords[index];
+}
+
+/* Returns the titlecase Unicode characters corresponding to ch or just
+   ch if no titlecase mapping is known. */
+
+Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+    int delta = ctype->title;
+
+    if (ctype->flags & NODELTA_MASK)
+	return delta;
+
+    if (delta >= 32768)
+	    delta -= 65536;
+
+    return ch + delta;
+}
+
+/* Returns 1 for Unicode characters having the category 'Lt', 0
+   otherwise. */
+
+int _PyUnicode_IsTitlecase(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & TITLE_MASK) != 0;
+}
+
+/* Returns the integer decimal (0-9) for Unicode characters having
+   this property, -1 otherwise. */
+
+int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
+}
+
+int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
+{
+    if (_PyUnicode_ToDecimalDigit(ch) < 0)
+	return 0;
+    return 1;
+}
+
+/* Returns the integer digit (0-9) for Unicode characters having
+   this property, -1 otherwise. */
+
+int _PyUnicode_ToDigit(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
+}
+
+int _PyUnicode_IsDigit(Py_UNICODE ch)
+{
+    if (_PyUnicode_ToDigit(ch) < 0)
+	return 0;
+    return 1;
+}
+
+/* Returns the numeric value as double for Unicode characters having
+   this property, -1.0 otherwise. */
+
+int _PyUnicode_IsNumeric(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & NUMERIC_MASK) != 0;
+}
+
+#ifndef WANT_WCTYPE_FUNCTIONS
+
+/* Returns 1 for Unicode characters having the category 'Ll', 0
+   otherwise. */
+
+int _PyUnicode_IsLowercase(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & LOWER_MASK) != 0;
+}
+
+/* Returns 1 for Unicode characters having the category 'Lu', 0
+   otherwise. */
+
+int _PyUnicode_IsUppercase(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & UPPER_MASK) != 0;
+}
+
+/* Returns the uppercase Unicode characters corresponding to ch or just
+   ch if no uppercase mapping is known. */
+
+Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+    int delta = ctype->upper;
+    if (ctype->flags & NODELTA_MASK)
+	return delta;
+    if (delta >= 32768)
+	    delta -= 65536;
+    return ch + delta;
+}
+
+/* Returns the lowercase Unicode characters corresponding to ch or just
+   ch if no lowercase mapping is known. */
+
+Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+    int delta = ctype->lower;
+    if (ctype->flags & NODELTA_MASK)
+	return delta;
+    if (delta >= 32768)
+	    delta -= 65536;
+    return ch + delta;
+}
+
+/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
+   'Lo' or 'Lm',  0 otherwise. */
+
+int _PyUnicode_IsAlpha(Py_UNICODE ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & ALPHA_MASK) != 0;
+}
+
+#else
+
+/* Export the interfaces using the wchar_t type for portability
+   reasons:  */
+
+int _PyUnicode_IsLowercase(Py_UNICODE ch)
+{
+    return iswlower(ch);
+}
+
+int _PyUnicode_IsUppercase(Py_UNICODE ch)
+{
+    return iswupper(ch);
+}
+
+Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+{
+    return towlower(ch);
+}
+
+Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+{
+    return towupper(ch);
+}
+
+int _PyUnicode_IsAlpha(Py_UNICODE ch)
+{
+    return iswalpha(ch);
+}
+
+#endif
--- a/from_cpython/Objects/unicodeobject.c
+++ b/from_cpython/Objects/unicodeobject.c
--- a/from_cpython/Objects/unicodetype_db.h
+++ b/from_cpython/Objects/unicodetype_db.h