Commit 0bb39828 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Import a bunch more CPython code to get str.format() working

Starting to really hit critical mass in terms of having enough
API support that it's easier to copy additional API functions over.
parent 2ef8b444
......@@ -281,8 +281,8 @@ STDLIB_OBJS := stdlib.bc.o stdlib.stripped.bc.o
STDLIB_RELEASE_OBJS := stdlib.release.bc.o
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c $(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c $(EXTRA_STDPYTHON_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c $(EXTRA_STDPYTHON_SRCS)
FROM_CPYTHON_SRCS := $(addprefix lib_python/2.7_Modules/,$(STDMODULE_SRCS)) $(addprefix lib_python/2.7_Objects/,$(STDOBJECT_SRCS)) $(addprefix lib_python/2.7_Python/,$(STDPYTHON_SRCS))
# The stdlib objects have slightly longer dependency chains,
......@@ -919,7 +919,7 @@ watch: watch_pyston_dbg
watch_vim:
$(MAKE) watch WATCH_ARGS='COLOR=0 USE_DISTCC=0 -j1 2>&1 | tee compile.log'
wdbg_%:
$(MAKE) $(patsubst wdbg_%,watch_dbg_%,$@) GDB_CMDS="--ex quit"
$(MAKE) $(patsubst wdbg_%,watch_dbg_%,$@) GDB_POST_CMDS="--ex quit"
.PHONY: test_asm test_cpp_asm
test_asm:
......
......@@ -63,6 +63,7 @@
#include "dictobject.h"
#include "tupleobject.h"
#include "methodobject.h"
#include "classobject.h"
#include "fileobject.h"
#include "pycapsule.h"
#include "sliceobject.h"
......@@ -84,6 +85,8 @@
#include "abstract.h"
#include "pyctype.h"
#include "pystrtod.h"
#include "dtoa.h"
// directly from CPython:
/* Argument must be a char or an int in [-128, 127] or [0, 255]. */
......
// This file is originally from CPython 2.7, with modifications for Pyston
/* Class object interface */
/* Revealing some structures (not for general use) */
#ifndef Py_CLASSOBJECT_H
#define Py_CLASSOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
// Pyston change: this is not the format we're using
#if 0
typedef struct {
PyObject_HEAD
PyObject *cl_bases; /* A tuple of class objects */
PyObject *cl_dict; /* A dictionary */
PyObject *cl_name; /* A string */
/* The following three are functions or NULL */
PyObject *cl_getattr;
PyObject *cl_setattr;
PyObject *cl_delattr;
PyObject *cl_weakreflist; /* List of weak references */
} PyClassObject;
typedef struct {
PyObject_HEAD
PyClassObject *in_class; /* The class object */
PyObject *in_dict; /* A dictionary */
PyObject *in_weakreflist; /* List of weak references */
} PyInstanceObject;
typedef struct {
PyObject_HEAD
PyObject *im_func; /* The callable object implementing the method */
PyObject *im_self; /* The instance it is bound to, or NULL */
PyObject *im_class; /* The class that asked for the method */
PyObject *im_weakreflist; /* List of weak references */
} PyMethodObject;
#endif
typedef struct _PyClassObject PyClassObject;
typedef struct _PyInstanceObject PyInstanceObject;
typedef struct _PyMethodObject PyMethodObject;
// Pyston change: these are not static objects any more
PyAPI_DATA(PyTypeObject*) classobj_cls;
PyAPI_DATA(PyTypeObject*) instance_cls;
PyAPI_DATA(PyTypeObject*) instancemethod_cls;
#define PyClass_Type (*classobj_cls)
#define PyInstance_Type (*instance_cls)
#define PyMethod_Type (*instancemethod_cls)
// Pyston change: change these to use the Py_TYPE macro instead
// of looking at ob_type directly
#define PyClass_Check(op) (Py_TYPE(op) == &PyClass_Type)
#define PyInstance_Check(op) (Py_TYPE(op) == &PyInstance_Type)
#define PyMethod_Check(op) (Py_TYPE(op) == &PyMethod_Type)
PyAPI_FUNC(PyObject *) PyClass_New(PyObject *, PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyInstance_New(PyObject *, PyObject *,
PyObject *);
PyAPI_FUNC(PyObject *) PyInstance_NewRaw(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyMethod_New(PyObject *, PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyMethod_Function(PyObject *);
PyAPI_FUNC(PyObject *) PyMethod_Self(PyObject *);
PyAPI_FUNC(PyObject *) PyMethod_Class(PyObject *);
/* Look up attribute with name (a string) on instance object pinst, using
* only the instance and base class dicts. If a descriptor is found in
* a class dict, the descriptor is returned without calling it.
* Returns NULL if nothing found, else a borrowed reference to the
* value associated with name in the dict in which name was found.
* The point of this routine is that it never calls arbitrary Python
* code, so is always "safe": all it does is dict lookups. The function
* can't fail, never sets an exception, and NULL is not an error (it just
* means "not found").
*/
PyAPI_FUNC(PyObject *) _PyInstance_Lookup(PyObject *pinst, PyObject *name);
/* Macros for direct access to these values. Type checks are *not*
done, so use with care. */
#define PyMethod_GET_FUNCTION(meth) \
(((PyMethodObject *)meth) -> im_func)
#define PyMethod_GET_SELF(meth) \
(((PyMethodObject *)meth) -> im_self)
#define PyMethod_GET_CLASS(meth) \
(((PyMethodObject *)meth) -> im_class)
PyAPI_FUNC(int) PyClass_IsSubclass(PyObject *, PyObject *);
PyAPI_FUNC(int) PyMethod_ClearFreeList(void);
#ifdef __cplusplus
}
#endif
#endif /* !Py_CLASSOBJECT_H */
......@@ -46,6 +46,7 @@ typedef struct {
Py_complex cval;
} PyComplexObject;
#endif
typedef struct _PyComplexObject PyComplexObject;
// Pyston change: this is not a static object any more
// PyAPI_DATA(PyTypeObject) PyComplex_Type;
......
......@@ -96,7 +96,9 @@ PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *);
PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *);
PyAPI_DATA(PyTypeObject) PyProperty_Type;
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PyProperty_Type;
#ifdef __cplusplus
}
#endif
......
// This file is originally from CPython 2.7, with modifications for Pyston
#ifndef PY_NO_SHORT_FLOAT_REPR
#ifdef __cplusplus
extern "C" {
#endif
PyAPI_FUNC(double) _Py_dg_strtod(const char *str, char **ptr);
PyAPI_FUNC(char *) _Py_dg_dtoa(double d, int mode, int ndigits,
int *decpt, int *sign, char **rve);
PyAPI_FUNC(void) _Py_dg_freedtoa(char *s);
#ifdef __cplusplus
}
#endif
#endif
......@@ -49,7 +49,8 @@ struct _inittab {
void (*initfunc)(void);
};
PyAPI_DATA(PyTypeObject) PyNullImporter_Type;
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PyNullImporter_Type;
PyAPI_DATA(struct _inittab *) PyImport_Inittab;
PyAPI_FUNC(int) PyImport_AppendInittab(const char *name, void (*initfunc)(void));
......
......@@ -7,13 +7,15 @@
extern "C" {
#endif
PyAPI_DATA(PyTypeObject) PySeqIter_Type;
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PySeqIter_Type;
#define PySeqIter_Check(op) (Py_TYPE(op) == &PySeqIter_Type)
PyAPI_FUNC(PyObject *) PySeqIter_New(PyObject *);
PyAPI_DATA(PyTypeObject) PyCallIter_Type;
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PyCallIter_Type;
#define PyCallIter_Check(op) (Py_TYPE(op) == &PyCallIter_Type)
......
......@@ -12,7 +12,8 @@ extern "C" {
not Python methods in user-defined classes. See classobject.h
for the latter. */
PyAPI_DATA(PyTypeObject) PyCFunction_Type;
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PyCFunction_Type;
#define PyCFunction_Check(op) (Py_TYPE(op) == &PyCFunction_Type)
......
......@@ -495,8 +495,9 @@ PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *);
PyAPI_DATA(PyTypeObject*) type_cls;
#define PyType_Type (*type_cls)
PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */
PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */
// Pyston change: this is no longer a static object
//PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */
//PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
PyAPI_FUNC(bool) PyType_Check(PyObject*);
......@@ -511,7 +512,8 @@ PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t);
PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *,
PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **);
// Pyston change: modified this to take a const char*
PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, const char *, PyObject **);
PyAPI_FUNC(unsigned int) PyType_ClearCache(void);
PyAPI_FUNC(void) PyType_Modified(PyTypeObject *);
......
......@@ -36,6 +36,13 @@
#define HAVE_EXPM1 1
#define Py_USING_UNICODE 1
#define Py_UNICODE_SIZE 4
#define HAVE_UINT32_T 1
#define HAVE_UINT64_T 1
#define HAVE_UINTPTR_T 1
#define HAVE_INT32_T 1
#define HAVE_INT64_T 1
#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 1
#define WITH_THREAD
......
......@@ -15,6 +15,96 @@ typedef ssize_t Py_ssize_t;
// Pyston change: the rest of these have just been copied from CPython's pyport.h, in an arbitrary order:
#if defined(_MSC_VER)
#if defined(PY_LOCAL_AGGRESSIVE)
/* enable more aggressive optimization for visual studio */
#pragma optimize("agtw", on)
#endif
/* ignore warnings if the compiler decides not to inline a function */
#pragma warning(disable: 4710)
/* fastest possible local call under MSVC */
#define Py_LOCAL(type) static type __fastcall
#define Py_LOCAL_INLINE(type) static __inline type __fastcall
#elif defined(USE_INLINE)
#define Py_LOCAL(type) static type
#define Py_LOCAL_INLINE(type) static inline type
#else
#define Py_LOCAL(type) static type
#define Py_LOCAL_INLINE(type) static type
#endif
/* The functions _Py_dg_strtod and _Py_dg_dtoa in Python/dtoa.c (which are
* required to support the short float repr introduced in Python 3.1) require
* that the floating-point unit that's being used for arithmetic operations
* on C doubles is set to use 53-bit precision. It also requires that the
* FPU rounding mode is round-half-to-even, but that's less often an issue.
*
* If your FPU isn't already set to 53-bit precision/round-half-to-even, and
* you want to make use of _Py_dg_strtod and _Py_dg_dtoa, then you should
*
* #define HAVE_PY_SET_53BIT_PRECISION 1
*
* and also give appropriate definitions for the following three macros:
*
* _PY_SET_53BIT_PRECISION_START : store original FPU settings, and
* set FPU to 53-bit precision/round-half-to-even
* _PY_SET_53BIT_PRECISION_END : restore original FPU settings
* _PY_SET_53BIT_PRECISION_HEADER : any variable declarations needed to
* use the two macros above.
*
* The macros are designed to be used within a single C function: see
* Python/pystrtod.c for an example of their use.
*/
/* get and set x87 control word for gcc/x86 */
#ifdef HAVE_GCC_ASM_FOR_X87
#define HAVE_PY_SET_53BIT_PRECISION 1
/* _Py_get/set_387controlword functions are defined in Python/pymath.c */
#define _Py_SET_53BIT_PRECISION_HEADER \
unsigned short old_387controlword, new_387controlword
#define _Py_SET_53BIT_PRECISION_START \
do { \
old_387controlword = _Py_get_387controlword(); \
new_387controlword = (old_387controlword & ~0x0f00) | 0x0200; \
if (new_387controlword != old_387controlword) \
_Py_set_387controlword(new_387controlword); \
} while (0)
#define _Py_SET_53BIT_PRECISION_END \
if (new_387controlword != old_387controlword) \
_Py_set_387controlword(old_387controlword)
#endif
/* get and set x87 control word for VisualStudio/x86 */
#if defined(_MSC_VER) && !defined(_WIN64) /* x87 not supported in 64-bit */
#define HAVE_PY_SET_53BIT_PRECISION 1
#define _Py_SET_53BIT_PRECISION_HEADER \
unsigned int old_387controlword, new_387controlword, out_387controlword
/* We use the __control87_2 function to set only the x87 control word.
The SSE control word is unaffected. */
#define _Py_SET_53BIT_PRECISION_START \
do { \
__control87_2(0, 0, &old_387controlword, NULL); \
new_387controlword = \
(old_387controlword & ~(_MCW_PC | _MCW_RC)) | (_PC_53 | _RC_NEAR); \
if (new_387controlword != old_387controlword) \
__control87_2(new_387controlword, _MCW_PC | _MCW_RC, \
&out_387controlword, NULL); \
} while (0)
#define _Py_SET_53BIT_PRECISION_END \
do { \
if (new_387controlword != old_387controlword) \
__control87_2(old_387controlword, _MCW_PC | _MCW_RC, \
&out_387controlword, NULL); \
} while (0)
#endif
/* default definitions are empty */
#ifndef HAVE_PY_SET_53BIT_PRECISION
#define _Py_SET_53BIT_PRECISION_HEADER
#define _Py_SET_53BIT_PRECISION_START
#define _Py_SET_53BIT_PRECISION_END
#endif
/* Py_DEPRECATED(version)
* Declare a variable, type, or function deprecated.
* Usage:
......@@ -137,5 +227,50 @@ typedef ssize_t Py_ssize_t;
/* Smallest negative value of type Py_ssize_t. */
#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1)
#ifdef uint32_t
#define HAVE_UINT32_T 1
#endif
#ifdef HAVE_UINT32_T
#ifndef PY_UINT32_T
#define PY_UINT32_T uint32_t
#endif
#endif
/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the
* long integer implementation, when 30-bit digits are enabled.
*/
#ifdef uint64_t
#define HAVE_UINT64_T 1
#endif
#ifdef HAVE_UINT64_T
#ifndef PY_UINT64_T
#define PY_UINT64_T uint64_t
#endif
#endif
/* Signed variants of the above */
#ifdef int32_t
#define HAVE_INT32_T 1
#endif
#ifdef HAVE_INT32_T
#ifndef PY_INT32_T
#define PY_INT32_T int32_t
#endif
#endif
#ifdef int64_t
#define HAVE_INT64_T 1
#endif
#ifdef HAVE_INT64_T
#ifndef PY_INT64_T
#define PY_INT64_T int64_t
#endif
#endif
#endif /* Py_PYPORT_H */
// This file is originally from CPython 2.7, with modifications for Pyston
//
#ifndef Py_STRTOD_H
#define Py_STRTOD_H
#ifdef __cplusplus
extern "C" {
#endif
PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
/* Deprecated in 2.7 and 3.1. Will disappear in 2.8 (if it exists) and 3.2 */
PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len,
const char *format, double d);
PyAPI_FUNC(double) PyOS_string_to_double(const char *str,
char **endptr,
PyObject *overflow_exception);
/* The caller is responsible for calling PyMem_Free to free the buffer
that's is returned. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type);
PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
#define Py_DTSF_SIGN 0x01 /* always add the sign */
#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
specific */
/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
#define Py_DTST_FINITE 0
#define Py_DTST_INFINITE 1
#define Py_DTST_NAN 2
#ifdef __cplusplus
}
#endif
#endif /* !Py_STRTOD_H */
......@@ -58,8 +58,9 @@ typedef struct _PyStringObject PyStringObject;
#define SSTATE_INTERNED_MORTAL 1
#define SSTATE_INTERNED_IMMORTAL 2
PyAPI_DATA(PyTypeObject) PyBaseString_Type;
// Pyston change: this is no longer a static object
// Pyston change: these are no longer a static object
PyAPI_DATA(PyTypeObject*) basestring_cls;
#define PyBaseString_Type (*basestring_cls)
PyAPI_DATA(PyTypeObject*) str_cls;
#define PyString_Type (*str_cls)
......@@ -101,8 +102,8 @@ PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
/* Macro, trading safety for speed */
// Pyston changes: these aren't direct macros any more [they potentially could be though]
#define PyString_AS_STRING(op) PyString_AsString(op)
#define PyString_GET_SIZE(op) PyString_Size(op)
#define PyString_AS_STRING(op) PyString_AsString((PyObject*)op)
#define PyString_GET_SIZE(op) PyString_Size((PyObject*)op)
//#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
//#define PyString_GET_SIZE(op) Py_SIZE(op)
......
bits shared by the stringobject and unicodeobject implementations (and
possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual
source files for details.
--------------------------------------------------------------------
the following defines used by the different modules:
STRINGLIB_CHAR
the type used to hold a character (char or Py_UNICODE)
STRINGLIB_EMPTY
a PyObject representing the empty string, only to be used if
STRINGLIB_MUTABLE is 0
Py_ssize_t STRINGLIB_LEN(PyObject*)
returns the length of the given string object (which must be of the
right type)
PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
creates a new string object
STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string
object (which must be of the right type)
int STRINGLIB_CHECK_EXACT(PyObject *)
returns true if the object is an instance of our type, not a subclass
STRINGLIB_MUTABLE
must be 0 or 1 to tell the cpp macros in stringlib code if the object
being operated on is mutable or not
/* stringlib: count implementation */
#ifndef STRINGLIB_COUNT_H
#define STRINGLIB_COUNT_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{
Py_ssize_t count;
if (str_len < 0)
return 0; /* start > len(str) */
if (sub_len == 0)
return (str_len < maxcount) ? str_len + 1 : maxcount;
count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0)
return 0; /* no match */
return count;
}
#endif
/* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */
#include "bytes_methods.h"
static PyObject*
stringlib_isspace(PyObject *self)
{
return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_isalpha(PyObject *self)
{
return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_isalnum(PyObject *self)
{
return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_isdigit(PyObject *self)
{
return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_islower(PyObject *self)
{
return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_isupper(PyObject *self)
{
return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
stringlib_istitle(PyObject *self)
{
return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
/* functions that return a new object partially translated by ctype funcs: */
static PyObject*
stringlib_lower(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
if (!newobj)
return NULL;
_Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return newobj;
}
static PyObject*
stringlib_upper(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
if (!newobj)
return NULL;
_Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return newobj;
}
static PyObject*
stringlib_title(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
if (!newobj)
return NULL;
_Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return newobj;
}
static PyObject*
stringlib_capitalize(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
if (!newobj)
return NULL;
_Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return newobj;
}
static PyObject*
stringlib_swapcase(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
if (!newobj)
return NULL;
_Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return newobj;
}
/* stringlib: fastsearch implementation */
#ifndef STRINGLIB_FASTSEARCH_H
#define STRINGLIB_FASTSEARCH_H
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/zone/stringlib.htm */
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
using this code in other contexts. also, the count mode returns -1
if there cannot possible be a match in the target string, and 0 if
it has actually checked for matches, but didn't find any. callers
beware! */
#define FAST_COUNT 0
#define FAST_SEARCH 1
#define FAST_RSEARCH 2
#if LONG_BIT >= 128
#define STRINGLIB_BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define STRINGLIB_BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define STRINGLIB_BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define STRINGLIB_BLOOM_ADD(mask, ch) \
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
unsigned long mask;
Py_ssize_t skip, count = 0;
Py_ssize_t i, j, mlast, w;
w = n - m;
if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1;
/* look for special cases */
if (m <= 1) {
if (m <= 0)
return -1;
/* use special case for 1-character strings */
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
if (count == maxcount)
return maxcount;
}
return count;
} else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++)
if (s[i] == p[0])
return i;
} else { /* FAST_RSEARCH */
for (i = n - 1; i > -1; i--)
if (s[i] == p[0])
return i;
}
return -1;
}
mlast = m - 1;
skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) {
/* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */
for (i = 0; i < mlast; i++) {
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast])
skip = mlast - i - 1;
}
/* process pattern[-1] outside the loop */
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
if (s[i+m-1] == p[m-1]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
break;
if (j == mlast) {
/* got a match! */
if (mode != FAST_COUNT)
return i;
count++;
if (count == maxcount)
return maxcount;
i = i + mlast;
continue;
}
/* miss: check if next character is part of pattern */
if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
}
}
} else { /* FAST_RSEARCH */
/* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */
STRINGLIB_BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) {
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[0])
skip = i - 1;
}
for (i = w; i >= 0; i--) {
if (s[i] == p[0]) {
/* candidate match */
for (j = mlast; j > 0; j--)
if (s[i+j] != p[j])
break;
if (j == 0)
/* got a match! */
return i;
/* miss: check if previous character is part of pattern */
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
else
i = i - skip;
} else {
/* skip: check if previous character is part of pattern */
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
}
}
}
if (mode != FAST_COUNT)
return -1;
return count;
}
#endif
/* stringlib: find/index implementation */
#ifndef STRINGLIB_FIND_H
#define STRINGLIB_FIND_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t offset)
{
Py_ssize_t pos;
if (str_len < 0)
return -1;
if (sub_len == 0)
return offset;
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0)
pos += offset;
return pos;
}
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t offset)
{
Py_ssize_t pos;
if (str_len < 0)
return -1;
if (sub_len == 0)
return str_len + offset;
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0)
pos += offset;
return pos;
}
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
ADJUST_INDICES(start, end, str_len);
return stringlib_find(str + start, end - start, sub, sub_len, start);
}
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
ADJUST_INDICES(start, end, str_len);
return stringlib_rfind(str + start, end - start, sub, sub_len, start);
}
#ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int)
stringlib_contains_obj(PyObject* str, PyObject* sub)
{
return stringlib_find(
STRINGLIB_STR(str), STRINGLIB_LEN(str),
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
) != -1;
}
#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
/*
This function is a helper for the "find" family (find, rfind, index,
rindex) and for count, startswith and endswith, because they all have
the same behaviour for the arguments.
It does not touch the variables received until it knows everything
is ok.
*/
#define FORMAT_BUFFER_SIZE 50
Py_LOCAL_INLINE(int)
stringlib_parse_args_finds(const char * function_name, PyObject *args,
PyObject **subobj,
Py_ssize_t *start, Py_ssize_t *end)
{
PyObject *tmp_subobj;
Py_ssize_t tmp_start = 0;
Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
PyObject *obj_start=Py_None, *obj_end=Py_None;
char format[FORMAT_BUFFER_SIZE] = "O|OO:";
size_t len = strlen(format);
strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
format[FORMAT_BUFFER_SIZE - 1] = '\0';
if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
return 0;
/* To support None in "start" and "end" arguments, meaning
the same as if they were not passed.
*/
if (obj_start != Py_None)
if (!_PyEval_SliceIndex(obj_start, &tmp_start))
return 0;
if (obj_end != Py_None)
if (!_PyEval_SliceIndex(obj_end, &tmp_end))
return 0;
*start = tmp_start;
*end = tmp_end;
*subobj = tmp_subobj;
return 1;
}
#undef FORMAT_BUFFER_SIZE
#if STRINGLIB_IS_UNICODE
/*
Wraps stringlib_parse_args_finds() and additionally ensures that the
first argument is a unicode object.
Note that we receive a pointer to the pointer of the substring object,
so when we create that object in this function we don't DECREF it,
because it continues living in the caller functions (those functions,
after finishing using the substring, must DECREF it).
*/
Py_LOCAL_INLINE(int)
stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
PyUnicodeObject **substring,
Py_ssize_t *start, Py_ssize_t *end)
{
PyObject *tmp_substring;
if(stringlib_parse_args_finds(function_name, args, &tmp_substring,
start, end)) {
tmp_substring = PyUnicode_FromObject(tmp_substring);
if (!tmp_substring)
return 0;
*substring = (PyUnicodeObject *)tmp_substring;
return 1;
}
return 0;
}
#endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */
// This file is originally from CPython 2.7, with modifications for Pyston
/* implements the string, long, and float formatters. that is,
string.__format__, etc. */
#include <locale.h>
/* Before including this, you must include either:
stringlib/unicodedefs.h
stringlib/stringdefs.h
Also, you should define the names:
FORMAT_STRING
FORMAT_LONG
FORMAT_FLOAT
FORMAT_COMPLEX
to be whatever you want the public names of these functions to
be. These are the only non-static functions defined here.
*/
/* Raises an exception about an unknown presentation type for this
* type. */
static void
unknown_presentation_type(STRINGLIB_CHAR presentation_type,
const char* type_name)
{
#if STRINGLIB_IS_UNICODE
/* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
hence the two cases. If it is char, gcc complains that the
condition below is always true, hence the ifdef. */
if (presentation_type > 32 && presentation_type < 128)
#endif
PyErr_Format(PyExc_ValueError,
"Unknown format code '%c' "
"for object of type '%.200s'",
(char)presentation_type,
type_name);
#if STRINGLIB_IS_UNICODE
else
PyErr_Format(PyExc_ValueError,
"Unknown format code '\\x%x' "
"for object of type '%.200s'",
(unsigned int)presentation_type,
type_name);
#endif
}
static void
invalid_comma_type(STRINGLIB_CHAR presentation_type)
{
#if STRINGLIB_IS_UNICODE
/* See comment in unknown_presentation_type */
if (presentation_type > 32 && presentation_type < 128)
#endif
PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '%c'.",
(char)presentation_type);
#if STRINGLIB_IS_UNICODE
else
PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '\\x%x'.",
(unsigned int)presentation_type);
#endif
}
/*
get_integer consumes 0 or more decimal digit characters from an
input string, updates *result with the corresponding positive
integer, and returns the number of digits consumed.
returns -1 on error.
*/
static int
get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
Py_ssize_t *result)
{
Py_ssize_t accumulator, digitval;
int numdigits;
accumulator = numdigits = 0;
for (;;(*ptr)++, numdigits++) {
if (*ptr >= end)
break;
digitval = STRINGLIB_TODECIMAL(**ptr);
if (digitval < 0)
break;
/*
Detect possible overflow before it happens:
accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
*/
if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
PyErr_Format(PyExc_ValueError,
"Too many decimal digits in format string");
return -1;
}
accumulator = accumulator * 10 + digitval;
}
*result = accumulator;
return numdigits;
}
/************************************************************************/
/*********** standard format specifier parsing **************************/
/************************************************************************/
/* returns true if this character is a specifier alignment token */
Py_LOCAL_INLINE(int)
is_alignment_token(STRINGLIB_CHAR c)
{
switch (c) {
case '<': case '>': case '=': case '^':
return 1;
default:
return 0;
}
}
/* returns true if this character is a sign element */
Py_LOCAL_INLINE(int)
is_sign_element(STRINGLIB_CHAR c)
{
switch (c) {
case ' ': case '+': case '-':
return 1;
default:
return 0;
}
}
typedef struct {
STRINGLIB_CHAR fill_char;
STRINGLIB_CHAR align;
int alternate;
STRINGLIB_CHAR sign;
Py_ssize_t width;
int thousands_separators;
Py_ssize_t precision;
STRINGLIB_CHAR type;
} InternalFormatSpec;
#if 0
/* Occassionally useful for debugging. Should normally be commented out. */
static void
DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
{
printf("internal format spec: fill_char %d\n", format->fill_char);
printf("internal format spec: align %d\n", format->align);
printf("internal format spec: alternate %d\n", format->alternate);
printf("internal format spec: sign %d\n", format->sign);
printf("internal format spec: width %zd\n", format->width);
printf("internal format spec: thousands_separators %d\n",
format->thousands_separators);
printf("internal format spec: precision %zd\n", format->precision);
printf("internal format spec: type %c\n", format->type);
printf("\n");
}
#endif
/*
ptr points to the start of the format_spec, end points just past its end.
fills in format with the parsed information.
returns 1 on success, 0 on failure.
if failure, sets the exception
*/
static int
parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len,
InternalFormatSpec *format,
char default_type,
char default_align)
{
STRINGLIB_CHAR *ptr = format_spec;
STRINGLIB_CHAR *end = format_spec + format_spec_len;
/* end-ptr is used throughout this code to specify the length of
the input string */
Py_ssize_t consumed;
int align_specified = 0;
int fill_char_specified = 0;
format->fill_char = ' ';
format->align = default_align;
format->alternate = 0;
format->sign = '\0';
format->width = -1;
format->thousands_separators = 0;
format->precision = -1;
format->type = default_type;
/* If the second char is an alignment token,
then parse the fill char */
if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
format->align = ptr[1];
format->fill_char = ptr[0];
fill_char_specified = 1;
align_specified = 1;
ptr += 2;
}
else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
format->align = ptr[0];
align_specified = 1;
++ptr;
}
/* Parse the various sign options */
if (end-ptr >= 1 && is_sign_element(ptr[0])) {
format->sign = ptr[0];
++ptr;
}
/* If the next character is #, we're in alternate mode. This only
applies to integers. */
if (end-ptr >= 1 && ptr[0] == '#') {
format->alternate = 1;
++ptr;
}
/* The special case for 0-padding (backwards compat) */
if (!fill_char_specified && end-ptr >= 1 && ptr[0] == '0') {
format->fill_char = '0';
if (!align_specified) {
format->align = '=';
}
++ptr;
}
consumed = get_integer(&ptr, end, &format->width);
if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* If consumed is 0, we didn't consume any characters for the
width. In that case, reset the width to -1, because
get_integer() will have set it to zero. -1 is how we record
that the width wasn't specified. */
if (consumed == 0)
format->width = -1;
/* Comma signifies add thousands separators */
if (end-ptr && ptr[0] == ',') {
format->thousands_separators = 1;
++ptr;
}
/* Parse field precision */
if (end-ptr && ptr[0] == '.') {
++ptr;
consumed = get_integer(&ptr, end, &format->precision);
if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* Not having a precision after a dot is an error. */
if (consumed == 0) {
PyErr_Format(PyExc_ValueError,
"Format specifier missing precision");
return 0;
}
}
/* Finally, parse the type field. */
if (end-ptr > 1) {
/* More than one char remain, invalid conversion spec. */
PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
return 0;
}
if (end-ptr == 1) {
format->type = ptr[0];
++ptr;
}
/* Do as much validating as we can, just by looking at the format
specifier. Do not take into account what type of formatting
we're doing (int, float, string). */
if (format->thousands_separators) {
switch (format->type) {
case 'd':
case 'e':
case 'f':
case 'g':
case 'E':
case 'G':
case '%':
case 'F':
case '\0':
/* These are allowed. See PEP 378.*/
break;
default:
invalid_comma_type(format->type);
return 0;
}
}
return 1;
}
/* Calculate the padding needed. */
static void
calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
Py_ssize_t *n_total)
{
if (width >= 0) {
if (nchars > width)
*n_total = nchars;
else
*n_total = width;
}
else {
/* not specified, use all of the chars and no more */
*n_total = nchars;
}
/* Figure out how much leading space we need, based on the
aligning */
if (align == '>')
*n_lpadding = *n_total - nchars;
else if (align == '^')
*n_lpadding = (*n_total - nchars) / 2;
else if (align == '<' || align == '=')
*n_lpadding = 0;
else {
/* We should never have an unspecified alignment. */
*n_lpadding = 0;
assert(0);
}
*n_rpadding = *n_total - nchars - *n_lpadding;
}
/* Do the padding, and return a pointer to where the caller-supplied
content goes. */
static STRINGLIB_CHAR *
fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
{
/* Pad on left. */
if (n_lpadding)
STRINGLIB_FILL(p, fill_char, n_lpadding);
/* Pad on right. */
if (n_rpadding)
STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
/* Pointer to the user content. */
return p + n_lpadding;
}
#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
/************************************************************************/
/*********** common routines for numeric formatting *********************/
/************************************************************************/
/* Locale type codes. */
#define LT_CURRENT_LOCALE 0
#define LT_DEFAULT_LOCALE 1
#define LT_NO_LOCALE 2
/* Locale info needed for formatting integers and the part of floats
before and including the decimal. Note that locales only support
8-bit chars, not unicode. */
typedef struct {
char *decimal_point;
char *thousands_sep;
char *grouping;
} LocaleInfo;
/* describes the layout for an integer, see the comment in
calc_number_widths() for details */
typedef struct {
Py_ssize_t n_lpadding;
Py_ssize_t n_prefix;
Py_ssize_t n_spadding;
Py_ssize_t n_rpadding;
char sign;
Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
any grouping chars. */
Py_ssize_t n_decimal; /* 0 if only an integer */
Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
excluding the decimal itself, if
present. */
/* These 2 are not the widths of fields, but are needed by
STRINGLIB_GROUPING. */
Py_ssize_t n_digits; /* The number of digits before a decimal
or exponent. */
Py_ssize_t n_min_width; /* The min_width we used when we computed
the n_grouped_digits width. */
} NumberFieldWidths;
/* Given a number of the form:
digits[remainder]
where ptr points to the start and end points to the end, find where
the integer part ends. This could be a decimal, an exponent, both,
or neither.
If a decimal point is present, set *has_decimal and increment
remainder beyond it.
Results are undefined (but shouldn't crash) for improperly
formatted strings.
*/
static void
parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
Py_ssize_t *n_remainder, int *has_decimal)
{
STRINGLIB_CHAR *end = ptr + len;
STRINGLIB_CHAR *remainder;
while (ptr<end && isdigit(*ptr))
++ptr;
remainder = ptr;
/* Does remainder start with a decimal point? */
*has_decimal = ptr<end && *remainder == '.';
/* Skip the decimal point. */
if (*has_decimal)
remainder++;
*n_remainder = end - remainder;
}
/* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */
static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
Py_ssize_t n_number, Py_ssize_t n_remainder,
int has_decimal, const LocaleInfo *locale,
const InternalFormatSpec *format)
{
Py_ssize_t n_non_digit_non_padding;
Py_ssize_t n_padding;
spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
spec->n_lpadding = 0;
spec->n_prefix = n_prefix;
spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
spec->n_remainder = n_remainder;
spec->n_spadding = 0;
spec->n_rpadding = 0;
spec->sign = '\0';
spec->n_sign = 0;
/* the output will look like:
| |
| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
| |
sign is computed from format->sign and the actual
sign of the number
prefix is given (it's for the '0x' prefix)
digits is already known
the total width is either given, or computed from the
actual digits
only one of lpadding, spadding, and rpadding can be non-zero,
and it's calculated from the width and other fields
*/
/* compute the various parts we're going to write */
switch (format->sign) {
case '+':
/* always put a + or - */
spec->n_sign = 1;
spec->sign = (sign_char == '-' ? '-' : '+');
break;
case ' ':
spec->n_sign = 1;
spec->sign = (sign_char == '-' ? '-' : ' ');
break;
default:
/* Not specified, or the default (-) */
if (sign_char == '-') {
spec->n_sign = 1;
spec->sign = '-';
}
}
/* The number of chars used for non-digits and non-padding. */
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
spec->n_remainder;
/* min_width can go negative, that's okay. format->width == -1 means
we don't care. */
if (format->fill_char == '0' && format->align == '=')
spec->n_min_width = format->width - n_non_digit_non_padding;
else
spec->n_min_width = 0;
if (spec->n_digits == 0)
/* This case only occurs when using 'c' formatting, we need
to special case it because the grouping code always wants
to have at least one character. */
spec->n_grouped_digits = 0;
else
spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
spec->n_digits,
spec->n_min_width,
locale->grouping,
locale->thousands_sep);
/* Given the desired width and the total of digit and non-digit
space we consume, see if we need any padding. format->width can
be negative (meaning no padding), but this code still works in
that case. */
n_padding = format->width -
(n_non_digit_non_padding + spec->n_grouped_digits);
if (n_padding > 0) {
/* Some padding is needed. Determine if it's left, space, or right. */
switch (format->align) {
case '<':
spec->n_rpadding = n_padding;
break;
case '^':
spec->n_lpadding = n_padding / 2;
spec->n_rpadding = n_padding - spec->n_lpadding;
break;
case '=':
spec->n_spadding = n_padding;
break;
case '>':
spec->n_lpadding = n_padding;
break;
default:
/* Shouldn't get here, but treat it as '>' */
spec->n_lpadding = n_padding;
assert(0);
break;
}
}
return spec->n_lpadding + spec->n_sign + spec->n_prefix +
spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
spec->n_remainder + spec->n_rpadding;
}
/* Fill in the digit parts of a numbers's string representation,
as determined in calc_number_widths().
No error checking, since we know the buffer is the correct size. */
static void
fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
LocaleInfo *locale, int toupper)
{
/* Used to keep track of digits, decimal, and remainder. */
STRINGLIB_CHAR *p = digits;
#ifndef NDEBUG
Py_ssize_t r;
#endif
if (spec->n_lpadding) {
STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
buf += spec->n_lpadding;
}
if (spec->n_sign == 1) {
*buf++ = spec->sign;
}
if (spec->n_prefix) {
memmove(buf,
prefix,
spec->n_prefix * sizeof(STRINGLIB_CHAR));
if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_prefix; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_prefix;
}
if (spec->n_spadding) {
STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
buf += spec->n_spadding;
}
/* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */
#ifndef NDEBUG
r =
#endif
STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
spec->n_digits, spec->n_min_width,
locale->grouping, locale->thousands_sep);
#ifndef NDEBUG
assert(r == spec->n_grouped_digits);
#endif
p += spec->n_digits;
}
if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_grouped_digits; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_grouped_digits;
if (spec->n_decimal) {
Py_ssize_t t;
for (t = 0; t < spec->n_decimal; ++t)
buf[t] = locale->decimal_point[t];
buf += spec->n_decimal;
p += 1;
}
if (spec->n_remainder) {
memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
buf += spec->n_remainder;
p += spec->n_remainder;
}
if (spec->n_rpadding) {
STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
buf += spec->n_rpadding;
}
}
static char no_grouping[1] = {CHAR_MAX};
/* Find the decimal point character(s?), thousands_separator(s?), and
grouping description, either for the current locale if type is
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
none if LT_NO_LOCALE. */
static void
get_locale_info(int type, LocaleInfo *locale_info)
{
switch (type) {
case LT_CURRENT_LOCALE: {
struct lconv *locale_data = localeconv();
locale_info->decimal_point = locale_data->decimal_point;
locale_info->thousands_sep = locale_data->thousands_sep;
locale_info->grouping = locale_data->grouping;
break;
}
case LT_DEFAULT_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = ",";
locale_info->grouping = "\3"; /* Group every 3 characters. The
(implicit) trailing 0 means repeat
infinitely. */
break;
case LT_NO_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = "";
locale_info->grouping = no_grouping;
break;
default:
assert(0);
}
}
#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
/************************************************************************/
/*********** string formatting ******************************************/
/************************************************************************/
static PyObject *
format_string_internal(PyObject *value, const InternalFormatSpec *format)
{
Py_ssize_t lpad;
Py_ssize_t rpad;
Py_ssize_t total;
STRINGLIB_CHAR *p;
Py_ssize_t len = STRINGLIB_LEN(value);
PyObject *result = NULL;
/* sign is not allowed on strings */
if (format->sign != '\0') {
PyErr_SetString(PyExc_ValueError,
"Sign not allowed in string format specifier");
goto done;
}
/* alternate is not allowed on strings */
if (format->alternate) {
PyErr_SetString(PyExc_ValueError,
"Alternate form (#) not allowed in string format "
"specifier");
goto done;
}
/* '=' alignment not allowed on strings */
if (format->align == '=') {
PyErr_SetString(PyExc_ValueError,
"'=' alignment not allowed "
"in string format specifier");
goto done;
}
/* if precision is specified, output no more that format.precision
characters */
if (format->precision >= 0 && len >= format->precision) {
len = format->precision;
}
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
/* allocate the resulting string */
result = STRINGLIB_NEW(NULL, total);
if (result == NULL)
goto done;
/* Write into that space. First the padding. */
p = fill_padding(STRINGLIB_STR(result), len,
format->fill_char, lpad, rpad);
/* Then the source string. */
memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
done:
return result;
}
/************************************************************************/
/*********** long formatting ********************************************/
/************************************************************************/
#if defined FORMAT_LONG || defined FORMAT_INT
typedef PyObject*
(*IntOrLongToString)(PyObject *value, int base);
static PyObject *
format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
IntOrLongToString tostring)
{
PyObject *result = NULL;
PyObject *tmp = NULL;
STRINGLIB_CHAR *pnumeric_chars;
STRINGLIB_CHAR numeric_char;
STRINGLIB_CHAR sign_char = '\0';
Py_ssize_t n_digits; /* count of digits need from the computed
string */
Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
produces non-digits */
Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Py_ssize_t n_total;
STRINGLIB_CHAR *prefix = NULL;
NumberFieldWidths spec;
long x;
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
/* no precision allowed on integers */
if (format->precision != -1) {
PyErr_SetString(PyExc_ValueError,
"Precision not allowed in integer format specifier");
goto done;
}
/* special case for character formatting */
if (format->type == 'c') {
/* error to specify a sign */
if (format->sign != '\0') {
PyErr_SetString(PyExc_ValueError,
"Sign not allowed with integer"
" format specifier 'c'");
goto done;
}
/* Error to specify a comma. */
if (format->thousands_separators) {
PyErr_SetString(PyExc_ValueError,
"Thousands separators not allowed with integer"
" format specifier 'c'");
goto done;
}
/* taken from unicodeobject.c formatchar() */
/* Integer input truncated to a character */
/* XXX: won't work for int */
x = PyLong_AsLong(value);
if (x == -1 && PyErr_Occurred())
goto done;
#ifdef Py_UNICODE_WIDE
if (x < 0 || x > 0x10ffff) {
PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x110000) "
"(wide Python build)");
goto done;
}
#else
if (x < 0 || x > 0xffff) {
PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x10000) "
"(narrow Python build)");
goto done;
}
#endif
numeric_char = (STRINGLIB_CHAR)x;
pnumeric_chars = &numeric_char;
n_digits = 1;
/* As a sort-of hack, we tell calc_number_widths that we only
have "remainder" characters. calc_number_widths thinks
these are characters that don't get formatted, only copied
into the output string. We do this for 'c' formatting,
because the characters are likely to be non-digits. */
n_remainder = 1;
}
else {
int base;
int leading_chars_to_skip = 0; /* Number of characters added by
PyNumber_ToBase that we want to
skip over. */
/* Compute the base and how many characters will be added by
PyNumber_ToBase */
switch (format->type) {
case 'b':
base = 2;
leading_chars_to_skip = 2; /* 0b */
break;
case 'o':
base = 8;
leading_chars_to_skip = 2; /* 0o */
break;
case 'x':
case 'X':
base = 16;
leading_chars_to_skip = 2; /* 0x */
break;
default: /* shouldn't be needed, but stops a compiler warning */
case 'd':
case 'n':
base = 10;
break;
}
/* The number of prefix chars is the same as the leading
chars to skip */
if (format->alternate)
n_prefix = leading_chars_to_skip;
/* Do the hard part, converting to a string in a given base */
tmp = tostring(value, base);
if (tmp == NULL)
goto done;
pnumeric_chars = STRINGLIB_STR(tmp);
n_digits = STRINGLIB_LEN(tmp);
prefix = pnumeric_chars;
/* Remember not to modify what pnumeric_chars points to. it
might be interned. Only modify it after we copy it into a
newly allocated output buffer. */
/* Is a sign character present in the output? If so, remember it
and skip it */
if (pnumeric_chars[0] == '-') {
sign_char = pnumeric_chars[0];
++prefix;
++leading_chars_to_skip;
}
/* Skip over the leading chars (0x, 0b, etc.) */
n_digits -= leading_chars_to_skip;
pnumeric_chars += leading_chars_to_skip;
}
/* Determine the grouping, separator, and decimal point, if any. */
get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Calculate how much memory we'll need. */
n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
n_digits, n_remainder, 0, &locale, format);
/* Allocate the memory. */
result = STRINGLIB_NEW(NULL, n_total);
if (!result)
goto done;
/* Populate the memory. */
fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
prefix, format->fill_char, &locale, format->type == 'X');
done:
Py_XDECREF(tmp);
return result;
}
#endif /* defined FORMAT_LONG || defined FORMAT_INT */
/************************************************************************/
/*********** float formatting *******************************************/
/************************************************************************/
#ifdef FORMAT_FLOAT
#if STRINGLIB_IS_UNICODE
static void
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
{
Py_ssize_t i;
for (i = 0; i < len; ++i)
buffer[i] = (Py_UNICODE)charbuffer[i];
}
#endif
/* much of this is taken from unicodeobject.c */
static PyObject *
format_float_internal(PyObject *value,
const InternalFormatSpec *format)
{
char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Py_ssize_t n_digits;
Py_ssize_t n_remainder;
Py_ssize_t n_total;
int has_decimal;
double val;
Py_ssize_t precision;
Py_ssize_t default_precision = 6;
STRINGLIB_CHAR type = format->type;
int add_pct = 0;
STRINGLIB_CHAR *p;
NumberFieldWidths spec;
int flags = 0;
PyObject *result = NULL;
STRINGLIB_CHAR sign_char = '\0';
int float_type; /* Used to see if we have a nan, inf, or regular float. */
#if STRINGLIB_IS_UNICODE
Py_UNICODE *unicode_tmp = NULL;
#endif
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
if (format->precision > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "precision too big");
goto done;
}
precision = (int)format->precision;
/* Alternate is not allowed on floats. */
if (format->alternate) {
PyErr_SetString(PyExc_ValueError,
"Alternate form (#) not allowed in float format "
"specifier");
goto done;
}
if (type == '\0') {
/* Omitted type specifier. This is like 'g' but with at least one
digit after the decimal point, and different default precision.*/
type = 'g';
default_precision = PyFloat_STR_PRECISION;
flags |= Py_DTSF_ADD_DOT_0;
}
if (type == 'n')
/* 'n' is the same as 'g', except for the locale used to
format the result. We take care of that later. */
type = 'g';
val = PyFloat_AsDouble(value);
if (val == -1.0 && PyErr_Occurred())
goto done;
if (type == '%') {
type = 'f';
val *= 100;
add_pct = 1;
}
if (precision < 0)
precision = default_precision;
/* Cast "type", because if we're in unicode we need to pass a
8-bit char. This is safe, because we've restricted what "type"
can be. */
buf = PyOS_double_to_string(val, (char)type, precision, flags,
&float_type);
if (buf == NULL)
goto done;
n_digits = strlen(buf);
if (add_pct) {
/* We know that buf has a trailing zero (since we just called
strlen() on it), and we don't use that fact any more. So we
can just write over the trailing zero. */
buf[n_digits] = '%';
n_digits += 1;
}
/* Since there is no unicode version of PyOS_double_to_string,
just use the 8 bit version and then convert to unicode. */
#if STRINGLIB_IS_UNICODE
unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
if (unicode_tmp == NULL) {
PyErr_NoMemory();
goto done;
}
strtounicode(unicode_tmp, buf, n_digits);
p = unicode_tmp;
#else
p = buf;
#endif
/* Is a sign character present in the output? If so, remember it
and skip it */
if (*p == '-') {
sign_char = *p;
++p;
--n_digits;
}
/* Determine if we have any "remainder" (after the digits, might include
decimal or exponent or both (or neither)) */
parse_number(p, n_digits, &n_remainder, &has_decimal);
/* Determine the grouping, separator, and decimal point, if any. */
get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Calculate how much memory we'll need. */
n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
n_remainder, has_decimal, &locale, format);
/* Allocate the memory. */
result = STRINGLIB_NEW(NULL, n_total);
if (result == NULL)
goto done;
/* Populate the memory. */
fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
format->fill_char, &locale, 0);
done:
PyMem_Free(buf);
#if STRINGLIB_IS_UNICODE
PyMem_Free(unicode_tmp);
#endif
return result;
}
#endif /* FORMAT_FLOAT */
/************************************************************************/
/*********** complex formatting *****************************************/
/************************************************************************/
#ifdef FORMAT_COMPLEX
static PyObject *
format_complex_internal(PyObject *value,
const InternalFormatSpec *format)
{
double re;
double im;
char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
InternalFormatSpec tmp_format = *format;
Py_ssize_t n_re_digits;
Py_ssize_t n_im_digits;
Py_ssize_t n_re_remainder;
Py_ssize_t n_im_remainder;
Py_ssize_t n_re_total;
Py_ssize_t n_im_total;
int re_has_decimal;
int im_has_decimal;
Py_ssize_t precision;
Py_ssize_t default_precision = 6;
STRINGLIB_CHAR type = format->type;
STRINGLIB_CHAR *p_re;
STRINGLIB_CHAR *p_im;
NumberFieldWidths re_spec;
NumberFieldWidths im_spec;
int flags = 0;
PyObject *result = NULL;
STRINGLIB_CHAR *p;
STRINGLIB_CHAR re_sign_char = '\0';
STRINGLIB_CHAR im_sign_char = '\0';
int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
int im_float_type;
int add_parens = 0;
int skip_re = 0;
Py_ssize_t lpad;
Py_ssize_t rpad;
Py_ssize_t total;
#if STRINGLIB_IS_UNICODE
Py_UNICODE *re_unicode_tmp = NULL;
Py_UNICODE *im_unicode_tmp = NULL;
#endif
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
if (format->precision > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "precision too big");
goto done;
}
precision = (int)format->precision;
/* Alternate is not allowed on complex. */
if (format->alternate) {
PyErr_SetString(PyExc_ValueError,
"Alternate form (#) not allowed in complex format "
"specifier");
goto done;
}
/* Neither is zero pading. */
if (format->fill_char == '0') {
PyErr_SetString(PyExc_ValueError,
"Zero padding is not allowed in complex format "
"specifier");
goto done;
}
/* Neither is '=' alignment . */
if (format->align == '=') {
PyErr_SetString(PyExc_ValueError,
"'=' alignment flag is not allowed in complex format "
"specifier");
goto done;
}
re = PyComplex_RealAsDouble(value);
if (re == -1.0 && PyErr_Occurred())
goto done;
im = PyComplex_ImagAsDouble(value);
if (im == -1.0 && PyErr_Occurred())
goto done;
if (type == '\0') {
/* Omitted type specifier. Should be like str(self). */
type = 'g';
default_precision = PyFloat_STR_PRECISION;
if (re == 0.0 && copysign(1.0, re) == 1.0)
skip_re = 1;
else
add_parens = 1;
}
if (type == 'n')
/* 'n' is the same as 'g', except for the locale used to
format the result. We take care of that later. */
type = 'g';
if (precision < 0)
precision = default_precision;
/* Cast "type", because if we're in unicode we need to pass a
8-bit char. This is safe, because we've restricted what "type"
can be. */
re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
&re_float_type);
if (re_buf == NULL)
goto done;
im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
&im_float_type);
if (im_buf == NULL)
goto done;
n_re_digits = strlen(re_buf);
n_im_digits = strlen(im_buf);
/* Since there is no unicode version of PyOS_double_to_string,
just use the 8 bit version and then convert to unicode. */
#if STRINGLIB_IS_UNICODE
re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
if (re_unicode_tmp == NULL) {
PyErr_NoMemory();
goto done;
}
strtounicode(re_unicode_tmp, re_buf, n_re_digits);
p_re = re_unicode_tmp;
im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
if (im_unicode_tmp == NULL) {
PyErr_NoMemory();
goto done;
}
strtounicode(im_unicode_tmp, im_buf, n_im_digits);
p_im = im_unicode_tmp;
#else
p_re = re_buf;
p_im = im_buf;
#endif
/* Is a sign character present in the output? If so, remember it
and skip it */
if (*p_re == '-') {
re_sign_char = *p_re;
++p_re;
--n_re_digits;
}
if (*p_im == '-') {
im_sign_char = *p_im;
++p_im;
--n_im_digits;
}
/* Determine if we have any "remainder" (after the digits, might include
decimal or exponent or both (or neither)) */
parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
/* Determine the grouping, separator, and decimal point, if any. */
get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Turn off any padding. We'll do it later after we've composed
the numbers without padding. */
tmp_format.fill_char = '\0';
tmp_format.align = '<';
tmp_format.width = -1;
/* Calculate how much memory we'll need. */
n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
n_re_digits, n_re_remainder,
re_has_decimal, &locale, &tmp_format);
/* Same formatting, but always include a sign, unless the real part is
* going to be omitted, in which case we use whatever sign convention was
* requested by the original format. */
if (!skip_re)
tmp_format.sign = '+';
n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
n_im_digits, n_im_remainder,
im_has_decimal, &locale, &tmp_format);
if (skip_re)
n_re_total = 0;
/* Add 1 for the 'j', and optionally 2 for parens. */
calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
format->width, format->align, &lpad, &rpad, &total);
result = STRINGLIB_NEW(NULL, total);
if (result == NULL)
goto done;
/* Populate the memory. First, the padding. */
p = fill_padding(STRINGLIB_STR(result),
n_re_total + n_im_total + 1 + add_parens * 2,
format->fill_char, lpad, rpad);
if (add_parens)
*p++ = '(';
if (!skip_re) {
fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
p += n_re_total;
}
fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
p += n_im_total;
*p++ = 'j';
if (add_parens)
*p++ = ')';
done:
PyMem_Free(re_buf);
PyMem_Free(im_buf);
#if STRINGLIB_IS_UNICODE
PyMem_Free(re_unicode_tmp);
PyMem_Free(im_unicode_tmp);
#endif
return result;
}
#endif /* FORMAT_COMPLEX */
/************************************************************************/
/*********** built in formatters ****************************************/
/************************************************************************/
PyObject *
FORMAT_STRING(PyObject *obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len)
{
InternalFormatSpec format;
PyObject *result = NULL;
/* check for the special case of zero length format spec, make
it equivalent to str(obj) */
if (format_spec_len == 0) {
result = STRINGLIB_TOSTR(obj);
goto done;
}
/* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec, format_spec_len,
&format, 's', '<'))
goto done;
/* type conversion? */
switch (format.type) {
case 's':
/* no type conversion needed, already a string. do the formatting */
result = format_string_internal(obj, &format);
break;
default:
/* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done;
}
done:
return result;
}
#if defined FORMAT_LONG || defined FORMAT_INT
static PyObject*
format_int_or_long(PyObject* obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len,
IntOrLongToString tostring)
{
PyObject *result = NULL;
PyObject *tmp = NULL;
InternalFormatSpec format;
/* check for the special case of zero length format spec, make
it equivalent to str(obj) */
if (format_spec_len == 0) {
result = STRINGLIB_TOSTR(obj);
goto done;
}
/* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec,
format_spec_len,
&format, 'd', '>'))
goto done;
/* type conversion? */
switch (format.type) {
case 'b':
case 'c':
case 'd':
case 'o':
case 'x':
case 'X':
case 'n':
/* no type conversion needed, already an int (or long). do
the formatting */
result = format_int_or_long_internal(obj, &format, tostring);
break;
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case '%':
/* convert to float */
tmp = PyNumber_Float(obj);
if (tmp == NULL)
goto done;
result = format_float_internal(tmp, &format);
break;
default:
/* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done;
}
done:
Py_XDECREF(tmp);
return result;
}
#endif /* FORMAT_LONG || defined FORMAT_INT */
#ifdef FORMAT_LONG
/* Need to define long_format as a function that will convert a long
to a string. In 3.0, _PyLong_Format has the correct signature. In
2.x, we need to fudge a few parameters */
#if PY_VERSION_HEX >= 0x03000000
#define long_format _PyLong_Format
#else
static PyObject*
long_format(PyObject* value, int base)
{
/* Convert to base, don't add trailing 'L', and use the new octal
format. We already know this is a long object */
assert(PyLong_Check(value));
/* convert to base, don't add 'L', and use the new octal format */
return _PyLong_Format(value, base, 0, 1);
}
#endif
PyObject *
FORMAT_LONG(PyObject *obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len)
{
return format_int_or_long(obj, format_spec, format_spec_len,
long_format);
}
#endif /* FORMAT_LONG */
#ifdef FORMAT_INT
/* this is only used for 2.x, not 3.0 */
static PyObject*
int_format(PyObject* value, int base)
{
/* Convert to base, and use the new octal format. We already
know this is an int object */
assert(PyInt_Check(value));
return _PyInt_Format((PyIntObject*)value, base, 1);
}
PyObject *
FORMAT_INT(PyObject *obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len)
{
return format_int_or_long(obj, format_spec, format_spec_len,
int_format);
}
#endif /* FORMAT_INT */
#ifdef FORMAT_FLOAT
PyObject *
FORMAT_FLOAT(PyObject *obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len)
{
PyObject *result = NULL;
InternalFormatSpec format;
/* check for the special case of zero length format spec, make
it equivalent to str(obj) */
if (format_spec_len == 0) {
result = STRINGLIB_TOSTR(obj);
goto done;
}
/* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec,
format_spec_len,
&format, '\0', '>'))
goto done;
/* type conversion? */
switch (format.type) {
case '\0': /* No format code: like 'g', but with at least one decimal. */
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'n':
case '%':
/* no conversion, already a float. do the formatting */
result = format_float_internal(obj, &format);
break;
default:
/* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done;
}
done:
return result;
}
#endif /* FORMAT_FLOAT */
#ifdef FORMAT_COMPLEX
PyObject *
FORMAT_COMPLEX(PyObject *obj,
STRINGLIB_CHAR *format_spec,
Py_ssize_t format_spec_len)
{
PyObject *result = NULL;
InternalFormatSpec format;
/* check for the special case of zero length format spec, make
it equivalent to str(obj) */
if (format_spec_len == 0) {
result = STRINGLIB_TOSTR(obj);
goto done;
}
/* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec,
format_spec_len,
&format, '\0', '>'))
goto done;
/* type conversion? */
switch (format.type) {
case '\0': /* No format code: like 'g', but with at least one decimal. */
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'n':
/* no conversion, already a complex. do the formatting */
result = format_complex_internal(obj, &format);
break;
default:
/* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done;
}
done:
return result;
}
#endif /* FORMAT_COMPLEX */
/* stringlib: locale related helpers implementation */
#ifndef STRINGLIB_LOCALEUTIL_H
#define STRINGLIB_LOCALEUTIL_H
#include <locale.h>
#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
typedef struct {
const char *grouping;
char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */
} GroupGenerator;
static void
_GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
_GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
terminate the generator. That shouldn't happen, but at least we
fail gracefully. */
switch (self->grouping[self->i]) {
case 0:
return self->previous;
case CHAR_MAX:
/* Stop the generator. */
return 0;
default: {
char ch = self->grouping[self->i];
self->previous = ch;
self->i++;
return (Py_ssize_t)ch;
}
}
}
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
Py_ssize_t thousands_sep_len)
{
#if STRINGLIB_IS_UNICODE
Py_ssize_t i;
#endif
if (thousands_sep) {
*buffer_end -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
for (i = 0; i < thousands_sep_len; ++i)
(*buffer_end)[i] = thousands_sep[i];
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(*buffer_end, thousands_sep, thousands_sep_len);
#endif
}
*buffer_end -= n_chars;
*digits_end -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
*buffer_end -= n_zeros;
STRINGLIB_FILL(*buffer_end, '0', n_zeros);
}
/**
* _Py_InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @buffer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/
Py_ssize_t
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
GroupGenerator groupgen;
_GroupGenerator_init(&groupgen, grouping);
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = _GroupGenerator_next(&groupgen)) > 0) {
l = MIN(l, MAX(MAX(remaining, min_width), 1));
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= l;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = MAX(MAX(remaining, min_width), 1);
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
}
return count;
}
/**
* _Py_InsertThousandsGroupingLocale:
* @buffer: A pointer to the start of a string.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
*
* Reads thee current locale and calls _Py_InsertThousandsGrouping().
**/
Py_ssize_t
_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width)
{
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;
return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
min_width, grouping, thousands_sep);
}
#endif /* STRINGLIB_LOCALEUTIL_H */
/* stringlib: partition implementation */
#ifndef STRINGLIB_PARTITION_H
#define STRINGLIB_PARTITION_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(PyObject*)
stringlib_partition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{
PyObject* out;
Py_ssize_t pos;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
out = PyTuple_New(3);
if (!out)
return NULL;
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
#endif
return out;
}
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
Py_INCREF(sep_obj);
PyTuple_SET_ITEM(out, 1, sep_obj);
pos += sep_len;
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) {
Py_DECREF(out);
return NULL;
}
return out;
}
Py_LOCAL_INLINE(PyObject*)
stringlib_rpartition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{
PyObject* out;
Py_ssize_t pos;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
out = PyTuple_New(3);
if (!out)
return NULL;
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
return out;
}
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
Py_INCREF(sep_obj);
PyTuple_SET_ITEM(out, 1, sep_obj);
pos += sep_len;
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) {
Py_DECREF(out);
return NULL;
}
return out;
}
#endif
/* stringlib: split implementation */
#ifndef STRINGLIB_SPLIT_H
#define STRINGLIB_SPLIT_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub);
#define SPLIT_ADD(data, left, right) { \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, sub); \
} else { \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
stringlib_split_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i == str_len) break;
j = i; i++;
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
i++;
#ifndef STRINGLIB_MUTABLE
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, j, i);
}
if (i < str_len) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to end of string */
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i != str_len)
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while ((j < str_len) && (maxcount-- > 0)) {
for(; j < str_len; j++) {
/* I found that using memchr makes no difference */
if (str[j] == ch) {
SPLIT_ADD(str, i, j);
i = j = j + 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (i <= str_len) {
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
SPLIT_ADD(str, i, j);
i = j + sep_len;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while (maxcount-- > 0) {
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i < 0) break;
j = i; i--;
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
i--;
#ifndef STRINGLIB_MUTABLE
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to beginning of string */
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i >= 0)
SPLIT_ADD(str, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for(; i >= 0; i--) {
if (str[i] == ch) {
SPLIT_ADD(str, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (j >= -1) {
SPLIT_ADD(str, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
j = str_len;
while (maxcount-- > 0) {
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(str, pos + sep_len, j);
j = pos;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, 0, j);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_splitlines(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
int keepends)
{
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
if (list == NULL)
return NULL;
for (i = j = 0; i < str_len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < str_len) {
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
#ifndef STRINGLIB_MUTABLE
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No linebreak in str_obj, so just use it as list[0] */
if (PyList_Append(list, str_obj))
goto onError;
break;
}
#endif
SPLIT_APPEND(str, j, eol);
j = i;
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
#endif
// This file is originally from CPython 2.7, with modifications for Pyston
/*
string_format.h -- implementation of string.format().
It uses the Objects/stringlib conventions, so that it can be
compiled for both unicode and string objects.
*/
/* Defines for Python 2.6 compatibility */
#if PY_VERSION_HEX < 0x03000000
#define PyLong_FromSsize_t _PyLong_FromSsize_t
#endif
/* Defines for more efficiently reallocating the string buffer */
#define INITIAL_SIZE_INCREMENT 100
#define SIZE_MULTIPLIER 2
#define MAX_SIZE_INCREMENT 3200
/************************************************************************/
/*********** Global data structures and forward declarations *********/
/************************************************************************/
/*
A SubString consists of the characters between two string or
unicode pointers.
*/
typedef struct {
STRINGLIB_CHAR *ptr;
STRINGLIB_CHAR *end;
} SubString;
typedef enum {
ANS_INIT,
ANS_AUTO,
ANS_MANUAL
} AutoNumberState; /* Keep track if we're auto-numbering fields */
/* Keeps track of our auto-numbering state, and which number field we're on */
typedef struct {
AutoNumberState an_state;
int an_field_number;
} AutoNumber;
/* forward declaration for recursion */
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number);
/************************************************************************/
/************************** Utility functions ************************/
/************************************************************************/
static void
AutoNumber_Init(AutoNumber *auto_number)
{
auto_number->an_state = ANS_INIT;
auto_number->an_field_number = 0;
}
/* fill in a SubString from a pointer and length */
Py_LOCAL_INLINE(void)
SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
{
str->ptr = p;
if (p == NULL)
str->end = NULL;
else
str->end = str->ptr + len;
}
/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString *str)
{
if (str->ptr == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString *str)
{
if (str->ptr == NULL) {
return STRINGLIB_NEW(NULL, 0);
}
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
/* Return 1 if an error has been detected switching between automatic
field numbering and manual field specification, else return 0. Set
ValueError on error. */
static int
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
{
if (state == ANS_MANUAL) {
if (field_name_is_empty) {
PyErr_SetString(PyExc_ValueError, "cannot switch from "
"manual field specification to "
"automatic field numbering");
return 1;
}
}
else {
if (!field_name_is_empty) {
PyErr_SetString(PyExc_ValueError, "cannot switch from "
"automatic field numbering to "
"manual field specification");
return 1;
}
}
return 0;
}
/************************************************************************/
/*********** Output string management functions ****************/
/************************************************************************/
typedef struct {
STRINGLIB_CHAR *ptr;
STRINGLIB_CHAR *end;
PyObject *obj;
Py_ssize_t size_increment;
} OutputString;
/* initialize an OutputString object, reserving size characters */
static int
output_initialize(OutputString *output, Py_ssize_t size)
{
output->obj = STRINGLIB_NEW(NULL, size);
if (output->obj == NULL)
return 0;
output->ptr = STRINGLIB_STR(output->obj);
output->end = STRINGLIB_LEN(output->obj) + output->ptr;
output->size_increment = INITIAL_SIZE_INCREMENT;
return 1;
}
/*
output_extend reallocates the output string buffer.
It returns a status: 0 for a failed reallocation,
1 for success.
*/
static int
output_extend(OutputString *output, Py_ssize_t count)
{
STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
Py_ssize_t curlen = output->ptr - startptr;
Py_ssize_t maxlen = curlen + count + output->size_increment;
if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
return 0;
startptr = STRINGLIB_STR(output->obj);
output->ptr = startptr + curlen;
output->end = startptr + maxlen;
if (output->size_increment < MAX_SIZE_INCREMENT)
output->size_increment *= SIZE_MULTIPLIER;
return 1;
}
/*
output_data dumps characters into our output string
buffer.
In some cases, it has to reallocate the string.
It returns a status: 0 for a failed reallocation,
1 for success.
*/
static int
output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
{
if ((count > output->end - output->ptr) && !output_extend(output, count))
return 0;
memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
output->ptr += count;
return 1;
}
/************************************************************************/
/*********** Format string parsing -- integers and identifiers *********/
/************************************************************************/
static Py_ssize_t
get_integer(const SubString *str)
{
Py_ssize_t accumulator = 0;
Py_ssize_t digitval;
STRINGLIB_CHAR *p;
/* empty string is an error */
if (str->ptr >= str->end)
return -1;
for (p = str->ptr; p < str->end; p++) {
digitval = STRINGLIB_TODECIMAL(*p);
if (digitval < 0)
return -1;
/*
Detect possible overflow before it happens:
accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
*/
if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
PyErr_Format(PyExc_ValueError,
"Too many decimal digits in format string");
return -1;
}
accumulator = accumulator * 10 + digitval;
}
return accumulator;
}
/************************************************************************/
/******** Functions to get field objects and specification strings ******/
/************************************************************************/
/* do the equivalent of obj.name */
static PyObject *
getattr(PyObject *obj, SubString *name)
{
PyObject *newobj;
PyObject *str = SubString_new_object(name);
if (str == NULL)
return NULL;
newobj = PyObject_GetAttr(obj, str);
Py_DECREF(str);
return newobj;
}
/* do the equivalent of obj[idx], where obj is a sequence */
static PyObject *
getitem_sequence(PyObject *obj, Py_ssize_t idx)
{
return PySequence_GetItem(obj, idx);
}
/* do the equivalent of obj[idx], where obj is not a sequence */
static PyObject *
getitem_idx(PyObject *obj, Py_ssize_t idx)
{
PyObject *newobj;
PyObject *idx_obj = PyLong_FromSsize_t(idx);
if (idx_obj == NULL)
return NULL;
newobj = PyObject_GetItem(obj, idx_obj);
Py_DECREF(idx_obj);
return newobj;
}
/* do the equivalent of obj[name] */
static PyObject *
getitem_str(PyObject *obj, SubString *name)
{
PyObject *newobj;
PyObject *str = SubString_new_object(name);
if (str == NULL)
return NULL;
newobj = PyObject_GetItem(obj, str);
Py_DECREF(str);
return newobj;
}
typedef struct {
/* the entire string we're parsing. we assume that someone else
is managing its lifetime, and that it will exist for the
lifetime of the iterator. can be empty */
SubString str;
/* pointer to where we are inside field_name */
STRINGLIB_CHAR *ptr;
} FieldNameIterator;
static int
FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
Py_ssize_t len)
{
SubString_init(&self->str, ptr, len);
self->ptr = self->str.ptr;
return 1;
}
static int
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
{
STRINGLIB_CHAR c;
name->ptr = self->ptr;
/* return everything until '.' or '[' */
while (self->ptr < self->str.end) {
switch (c = *self->ptr++) {
case '[':
case '.':
/* backup so that we this character will be seen next time */
self->ptr--;
break;
default:
continue;
}
break;
}
/* end of string is okay */
name->end = self->ptr;
return 1;
}
static int
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
{
int bracket_seen = 0;
STRINGLIB_CHAR c;
name->ptr = self->ptr;
/* return everything until ']' */
while (self->ptr < self->str.end) {
switch (c = *self->ptr++) {
case ']':
bracket_seen = 1;
break;
default:
continue;
}
break;
}
/* make sure we ended with a ']' */
if (!bracket_seen) {
PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
return 0;
}
/* end of string is okay */
/* don't include the ']' */
name->end = self->ptr-1;
return 1;
}
/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
static int
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
Py_ssize_t *name_idx, SubString *name)
{
/* check at end of input */
if (self->ptr >= self->str.end)
return 1;
switch (*self->ptr++) {
case '.':
*is_attribute = 1;
if (_FieldNameIterator_attr(self, name) == 0)
return 0;
*name_idx = -1;
break;
case '[':
*is_attribute = 0;
if (_FieldNameIterator_item(self, name) == 0)
return 0;
*name_idx = get_integer(name);
if (*name_idx == -1 && PyErr_Occurred())
return 0;
break;
default:
/* Invalid character follows ']' */
PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
"follow ']' in format field specifier");
return 0;
}
/* empty string is an error */
if (name->ptr == name->end) {
PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
return 0;
}
return 2;
}
/* input: field_name
output: 'first' points to the part before the first '[' or '.'
'first_idx' is -1 if 'first' is not an integer, otherwise
it's the value of first converted to an integer
'rest' is an iterator to return the rest
*/
static int
field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
Py_ssize_t *first_idx, FieldNameIterator *rest,
AutoNumber *auto_number)
{
STRINGLIB_CHAR c;
STRINGLIB_CHAR *p = ptr;
STRINGLIB_CHAR *end = ptr + len;
int field_name_is_empty;
int using_numeric_index;
/* find the part up until the first '.' or '[' */
while (p < end) {
switch (c = *p++) {
case '[':
case '.':
/* backup so that we this character is available to the
"rest" iterator */
p--;
break;
default:
continue;
}
break;
}
/* set up the return values */
SubString_init(first, ptr, p - ptr);
FieldNameIterator_init(rest, p, end - p);
/* see if "first" is an integer, in which case it's used as an index */
*first_idx = get_integer(first);
if (*first_idx == -1 && PyErr_Occurred())
return 0;
field_name_is_empty = first->ptr >= first->end;
/* If the field name is omitted or if we have a numeric index
specified, then we're doing numeric indexing into args. */
using_numeric_index = field_name_is_empty || *first_idx != -1;
/* We always get here exactly one time for each field we're
processing. And we get here in field order (counting by left
braces). So this is the perfect place to handle automatic field
numbering if the field name is omitted. */
/* Check if we need to do the auto-numbering. It's not needed if
we're called from string.Format routines, because it's handled
in that class by itself. */
if (auto_number) {
/* Initialize our auto numbering state if this is the first
time we're either auto-numbering or manually numbering. */
if (auto_number->an_state == ANS_INIT && using_numeric_index)
auto_number->an_state = field_name_is_empty ?
ANS_AUTO : ANS_MANUAL;
/* Make sure our state is consistent with what we're doing
this time through. Only check if we're using a numeric
index. */
if (using_numeric_index)
if (autonumber_state_error(auto_number->an_state,
field_name_is_empty))
return 0;
/* Zero length field means we want to do auto-numbering of the
fields. */
if (field_name_is_empty)
*first_idx = (auto_number->an_field_number)++;
}
return 1;
}
/*
get_field_object returns the object inside {}, before the
format_spec. It handles getindex and getattr lookups and consumes
the entire input string.
*/
static PyObject *
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
AutoNumber *auto_number)
{
PyObject *obj = NULL;
int ok;
int is_attribute;
SubString name;
SubString first;
Py_ssize_t index;
FieldNameIterator rest;
if (!field_name_split(input->ptr, input->end - input->ptr, &first,
&index, &rest, auto_number)) {
goto error;
}
if (index == -1) {
/* look up in kwargs */
PyObject *key = SubString_new_object(&first);
if (key == NULL)
goto error;
if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
goto error;
}
Py_DECREF(key);
Py_INCREF(obj);
}
else {
/* look up in args */
obj = PySequence_GetItem(args, index);
if (obj == NULL)
goto error;
}
/* iterate over the rest of the field_name */
while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
&name)) == 2) {
PyObject *tmp;
if (is_attribute)
/* getattr lookup "." */
tmp = getattr(obj, &name);
else
/* getitem lookup "[]" */
if (index == -1)
tmp = getitem_str(obj, &name);
else
if (PySequence_Check(obj))
tmp = getitem_sequence(obj, index);
else
/* not a sequence */
tmp = getitem_idx(obj, index);
if (tmp == NULL)
goto error;
/* assign to obj */
Py_DECREF(obj);
obj = tmp;
}
/* end of iterator, this is the non-error case */
if (ok == 1)
return obj;
error:
Py_XDECREF(obj);
return NULL;
}
/************************************************************************/
/***************** Field rendering functions **************************/
/************************************************************************/
/*
render_field() is the main function in this section. It takes the
field object and field specification string generated by
get_field_and_spec, and renders the field into the output string.
render_field calls fieldobj.__format__(format_spec) method, and
appends to the output.
*/
static int
render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
{
int ok = 0;
PyObject *result = NULL;
PyObject *format_spec_object = NULL;
PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
format_spec->ptr : NULL;
Py_ssize_t format_spec_len = format_spec->ptr ?
format_spec->end - format_spec->ptr : 0;
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
#if STRINGLIB_IS_UNICODE
if (PyUnicode_CheckExact(fieldobj))
formatter = _PyUnicode_FormatAdvanced;
/* Unfortunately, there's a problem with checking for int, long,
and float here. If we're being included as unicode, their
formatters expect string format_spec args. For now, just skip
this optimization for unicode. This could be fixed, but it's a
hassle. */
#else
if (PyString_CheckExact(fieldobj))
formatter = _PyBytes_FormatAdvanced;
else if (PyInt_CheckExact(fieldobj))
formatter =_PyInt_FormatAdvanced;
else if (PyLong_CheckExact(fieldobj))
formatter =_PyLong_FormatAdvanced;
else if (PyFloat_CheckExact(fieldobj))
formatter = _PyFloat_FormatAdvanced;
#endif
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */
result = formatter(fieldobj, format_spec_start, format_spec_len);
}
else {
/* We need to create an object out of the pointers we have, because
__format__ takes a string/unicode object for format_spec. */
format_spec_object = STRINGLIB_NEW(format_spec_start,
format_spec_len);
if (format_spec_object == NULL)
goto done;
result = PyObject_Format(fieldobj, format_spec_object);
}
if (result == NULL)
goto done;
#if PY_VERSION_HEX >= 0x03000000
assert(PyUnicode_Check(result));
#else
assert(PyString_Check(result) || PyUnicode_Check(result));
/* Convert result to our type. We could be str, and result could
be unicode */
{
PyObject *tmp = STRINGLIB_TOSTR(result);
if (tmp == NULL)
goto done;
Py_DECREF(result);
result = tmp;
}
#endif
ok = output_data(output,
STRINGLIB_STR(result), STRINGLIB_LEN(result));
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
return ok;
}
static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
STRINGLIB_CHAR *conversion)
{
/* Note this function works if the field name is zero length,
which is good. Zero length field names are handled later, in
field_name_split. */
STRINGLIB_CHAR c = 0;
/* initialize these, as they may be empty */
*conversion = '\0';
SubString_init(format_spec, NULL, 0);
/* Search for the field name. it's terminated by the end of
the string, or a ':' or '!' */
field_name->ptr = str->ptr;
while (str->ptr < str->end) {
switch (c = *(str->ptr++)) {
case ':':
case '!':
break;
default:
continue;
}
break;
}
if (c == '!' || c == ':') {
/* we have a format specifier and/or a conversion */
/* don't include the last character */
field_name->end = str->ptr-1;
/* the format specifier is the rest of the string */
format_spec->ptr = str->ptr;
format_spec->end = str->end;
/* see if there's a conversion specifier */
if (c == '!') {
/* there must be another character present */
if (format_spec->ptr >= format_spec->end) {
PyErr_SetString(PyExc_ValueError,
"end of format while looking for conversion "
"specifier");
return 0;
}
*conversion = *(format_spec->ptr++);
/* if there is another character, it must be a colon */
if (format_spec->ptr < format_spec->end) {
c = *(format_spec->ptr++);
if (c != ':') {
PyErr_SetString(PyExc_ValueError,
"expected ':' after format specifier");
return 0;
}
}
}
}
else
/* end of string, there's no format_spec or conversion */
field_name->end = str->ptr;
return 1;
}
/************************************************************************/
/******* Output string allocation and escape-to-markup processing ******/
/************************************************************************/
/* MarkupIterator breaks the string into pieces of either literal
text, or things inside {} that need to be marked up. it is
designed to make it easy to wrap a Python iterator around it, for
use with the Formatter class */
typedef struct {
SubString str;
} MarkupIterator;
static int
MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
{
SubString_init(&self->str, ptr, len);
return 1;
}
/* returns 0 on error, 1 on non-error termination, and 2 if it got a
string (or something to be expanded) */
static int
MarkupIterator_next(MarkupIterator *self, SubString *literal,
int *field_present, SubString *field_name,
SubString *format_spec, STRINGLIB_CHAR *conversion,
int *format_spec_needs_expanding)
{
int at_end;
STRINGLIB_CHAR c = 0;
STRINGLIB_CHAR *start;
int count;
Py_ssize_t len;
int markup_follows = 0;
/* initialize all of the output variables */
SubString_init(literal, NULL, 0);
SubString_init(field_name, NULL, 0);
SubString_init(format_spec, NULL, 0);
*conversion = '\0';
*format_spec_needs_expanding = 0;
*field_present = 0;
/* No more input, end of iterator. This is the normal exit
path. */
if (self->str.ptr >= self->str.end)
return 1;
start = self->str.ptr;
/* First read any literal text. Read until the end of string, an
escaped '{' or '}', or an unescaped '{'. In order to never
allocate memory and so I can just pass pointers around, if
there's an escaped '{' or '}' then we'll return the literal
including the brace, but no format object. The next time
through, we'll return the rest of the literal, skipping past
the second consecutive brace. */
while (self->str.ptr < self->str.end) {
switch (c = *(self->str.ptr++)) {
case '{':
case '}':
markup_follows = 1;
break;
default:
continue;
}
break;
}
at_end = self->str.ptr >= self->str.end;
len = self->str.ptr - start;
if ((c == '}') && (at_end || (c != *self->str.ptr))) {
PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
"in format string");
return 0;
}
if (at_end && c == '{') {
PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
"in format string");
return 0;
}
if (!at_end) {
if (c == *self->str.ptr) {
/* escaped } or {, skip it in the input. there is no
markup object following us, just this literal text */
self->str.ptr++;
markup_follows = 0;
}
else
len--;
}
/* record the literal text */
literal->ptr = start;
literal->end = start + len;
if (!markup_follows)
return 2;
/* this is markup, find the end of the string by counting nested
braces. note that this prohibits escaped braces, so that
format_specs cannot have braces in them. */
*field_present = 1;
count = 1;
start = self->str.ptr;
/* we know we can't have a zero length string, so don't worry
about that case */
while (self->str.ptr < self->str.end) {
switch (c = *(self->str.ptr++)) {
case '{':
/* the format spec needs to be recursively expanded.
this is an optimization, and not strictly needed */
*format_spec_needs_expanding = 1;
count++;
break;
case '}':
count--;
if (count <= 0) {
/* we're done. parse and get out */
SubString s;
SubString_init(&s, start, self->str.ptr - 1 - start);
if (parse_field(&s, field_name, format_spec, conversion) == 0)
return 0;
/* success */
return 2;
}
break;
}
}
/* end of string while searching for matching '}' */
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
return 0;
}
/* do the !r or !s conversion on obj */
static PyObject *
do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
{
/* XXX in pre-3.0, do we need to convert this to unicode, since it
might have returned a string? */
switch (conversion) {
case 'r':
return PyObject_Repr(obj);
case 's':
return STRINGLIB_TOSTR(obj);
default:
if (conversion > 32 && conversion < 127) {
/* It's the ASCII subrange; casting to char is safe
(assuming the execution character set is an ASCII
superset). */
PyErr_Format(PyExc_ValueError,
"Unknown conversion specifier %c",
(char)conversion);
} else
PyErr_Format(PyExc_ValueError,
"Unknown conversion specifier \\x%x",
(unsigned int)conversion);
return NULL;
}
}
/* given:
{field_name!conversion:format_spec}
compute the result and write it to output.
format_spec_needs_expanding is an optimization. if it's false,
just output the string directly, otherwise recursively expand the
format_spec string.
field_name is allowed to be zero length, in which case we
are doing auto field numbering.
*/
static int
output_markup(SubString *field_name, SubString *format_spec,
int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
OutputString *output, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
PyObject *tmp = NULL;
PyObject *fieldobj = NULL;
SubString expanded_format_spec;
SubString *actual_format_spec;
int result = 0;
/* convert field_name to an object */
fieldobj = get_field_object(field_name, args, kwargs, auto_number);
if (fieldobj == NULL)
goto done;
if (conversion != '\0') {
tmp = do_conversion(fieldobj, conversion);
if (tmp == NULL)
goto done;
/* do the assignment, transferring ownership: fieldobj = tmp */
Py_DECREF(fieldobj);
fieldobj = tmp;
tmp = NULL;
}
/* if needed, recurively compute the format_spec */
if (format_spec_needs_expanding) {
tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
auto_number);
if (tmp == NULL)
goto done;
/* note that in the case we're expanding the format string,
tmp must be kept around until after the call to
render_field. */
SubString_init(&expanded_format_spec,
STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
actual_format_spec = &expanded_format_spec;
}
else
actual_format_spec = format_spec;
if (render_field(fieldobj, actual_format_spec, output) == 0)
goto done;
result = 1;
done:
Py_XDECREF(fieldobj);
Py_XDECREF(tmp);
return result;
}
/*
do_markup is the top-level loop for the format() method. It
searches through the format string for escapes to markup codes, and
calls other functions to move non-markup text to the output,
and to perform the markup to the output.
*/
static int
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
OutputString *output, int recursion_depth, AutoNumber *auto_number)
{
MarkupIterator iter;
int format_spec_needs_expanding;
int result;
int field_present;
SubString literal;
SubString field_name;
SubString format_spec;
STRINGLIB_CHAR conversion;
MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
if (!output_data(output, literal.ptr, literal.end - literal.ptr))
return 0;
if (field_present)
if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, output,
args, kwargs, recursion_depth, auto_number))
return 0;
}
return result;
}
/*
build_string allocates the output string and then
calls do_markup to do the heavy lifting.
*/
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
OutputString output;
PyObject *result = NULL;
Py_ssize_t count;
output.obj = NULL; /* needed so cleanup code always works */
/* check the recursion level */
if (recursion_depth <= 0) {
PyErr_SetString(PyExc_ValueError,
"Max string recursion exceeded");
goto done;
}
/* initial size is the length of the format string, plus the size
increment. seems like a reasonable default */
if (!output_initialize(&output,
input->end - input->ptr +
INITIAL_SIZE_INCREMENT))
goto done;
if (!do_markup(input, args, kwargs, &output, recursion_depth,
auto_number)) {
goto done;
}
count = output.ptr - STRINGLIB_STR(output.obj);
if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
goto done;
}
/* transfer ownership to result */
result = output.obj;
output.obj = NULL;
done:
Py_XDECREF(output.obj);
return result;
}
/************************************************************************/
/*********** main routine ***********************************************/
/************************************************************************/
/* this is the main entry point */
// Pyston change: changed to non-static
/* static */ PyObject *
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
{
SubString input;
/* PEP 3101 says only 2 levels, so that
"{0:{1}}".format('abc', 's') # works
"{0:{1:{2}}}".format('abc', 's', '') # fails
*/
int recursion_depth = 2;
AutoNumber auto_number;
AutoNumber_Init(&auto_number);
SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
return build_string(&input, args, kwargs, recursion_depth, &auto_number);
}
/************************************************************************/
/*********** formatteriterator ******************************************/
/************************************************************************/
/* This is used to implement string.Formatter.vparse(). It exists so
Formatter can share code with the built in unicode.format() method.
It's really just a wrapper around MarkupIterator that is callable
from Python. */
typedef struct {
PyObject_HEAD
STRINGLIB_OBJECT *str;
MarkupIterator it_markup;
} formatteriterobject;
static void
formatteriter_dealloc(formatteriterobject *it)
{
Py_XDECREF(it->str);
PyObject_FREE(it);
}
/* returns a tuple:
(literal, field_name, format_spec, conversion)
literal is any literal text to output. might be zero length
field_name is the string before the ':'. might be None
format_spec is the string after the ':'. mibht be None
conversion is either None, or the string after the '!'
*/
static PyObject *
formatteriter_next(formatteriterobject *it)
{
SubString literal;
SubString field_name;
SubString format_spec;
STRINGLIB_CHAR conversion;
int format_spec_needs_expanding;
int field_present;
int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
&field_name, &format_spec, &conversion,
&format_spec_needs_expanding);
/* all of the SubString objects point into it->str, so no
memory management needs to be done on them */
assert(0 <= result && result <= 2);
if (result == 0 || result == 1)
/* if 0, error has already been set, if 1, iterator is empty */
return NULL;
else {
PyObject *literal_str = NULL;
PyObject *field_name_str = NULL;
PyObject *format_spec_str = NULL;
PyObject *conversion_str = NULL;
PyObject *tuple = NULL;
literal_str = SubString_new_object(&literal);
if (literal_str == NULL)
goto done;
field_name_str = SubString_new_object(&field_name);
if (field_name_str == NULL)
goto done;
/* if field_name is non-zero length, return a string for
format_spec (even if zero length), else return None */
format_spec_str = (field_present ?
SubString_new_object_or_empty :
SubString_new_object)(&format_spec);
if (format_spec_str == NULL)
goto done;
/* if the conversion is not specified, return a None,
otherwise create a one length string with the conversion
character */
if (conversion == '\0') {
conversion_str = Py_None;
Py_INCREF(conversion_str);
}
else
conversion_str = STRINGLIB_NEW(&conversion, 1);
if (conversion_str == NULL)
goto done;
tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
conversion_str);
done:
Py_XDECREF(literal_str);
Py_XDECREF(field_name_str);
Py_XDECREF(format_spec_str);
Py_XDECREF(conversion_str);
return tuple;
}
}
static PyMethodDef formatteriter_methods[] = {
{NULL, NULL} /* sentinel */
};
static PyTypeObject PyFormatterIter_Type = {
// Pyston change:
PyVarObject_HEAD_INIT(NULL /* was &PyType_Type */, 0)
"formatteriterator", /* tp_name */
sizeof(formatteriterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)formatteriter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)formatteriter_next, /* tp_iternext */
formatteriter_methods, /* tp_methods */
0,
};
/* unicode_formatter_parser is used to implement
string.Formatter.vformat. it parses a string and returns tuples
describing the parsed elements. It's a wrapper around
stringlib/string_format.h's MarkupIterator */
static PyObject *
formatter_parser(STRINGLIB_OBJECT *self)
{
formatteriterobject *it;
it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
if (it == NULL)
return NULL;
/* take ownership, give the object to the iterator */
Py_INCREF(self);
it->str = self;
/* initialize the contained MarkupIterator */
MarkupIterator_init(&it->it_markup,
STRINGLIB_STR(self),
STRINGLIB_LEN(self));
return (PyObject *)it;
}
/************************************************************************/
/*********** fieldnameiterator ******************************************/
/************************************************************************/
/* This is used to implement string.Formatter.vparse(). It parses the
field name into attribute and item values. It's a Python-callable
wrapper around FieldNameIterator */
typedef struct {
PyObject_HEAD
STRINGLIB_OBJECT *str;
FieldNameIterator it_field;
} fieldnameiterobject;
static void
fieldnameiter_dealloc(fieldnameiterobject *it)
{
Py_XDECREF(it->str);
PyObject_FREE(it);
}
/* returns a tuple:
(is_attr, value)
is_attr is true if we used attribute syntax (e.g., '.foo')
false if we used index syntax (e.g., '[foo]')
value is an integer or string
*/
static PyObject *
fieldnameiter_next(fieldnameiterobject *it)
{
int result;
int is_attr;
Py_ssize_t idx;
SubString name;
result = FieldNameIterator_next(&it->it_field, &is_attr,
&idx, &name);
if (result == 0 || result == 1)
/* if 0, error has already been set, if 1, iterator is empty */
return NULL;
else {
PyObject* result = NULL;
PyObject* is_attr_obj = NULL;
PyObject* obj = NULL;
is_attr_obj = PyBool_FromLong(is_attr);
if (is_attr_obj == NULL)
goto done;
/* either an integer or a string */
if (idx != -1)
obj = PyLong_FromSsize_t(idx);
else
obj = SubString_new_object(&name);
if (obj == NULL)
goto done;
/* return a tuple of values */
result = PyTuple_Pack(2, is_attr_obj, obj);
done:
Py_XDECREF(is_attr_obj);
Py_XDECREF(obj);
return result;
}
}
static PyMethodDef fieldnameiter_methods[] = {
{NULL, NULL} /* sentinel */
};
static PyTypeObject PyFieldNameIter_Type = {
// Pyston change:
PyVarObject_HEAD_INIT(NULL /* was &PyType_Type */, 0)
"fieldnameiterator", /* tp_name */
sizeof(fieldnameiterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)fieldnameiter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)fieldnameiter_next, /* tp_iternext */
fieldnameiter_methods, /* tp_methods */
0};
/* unicode_formatter_field_name_split is used to implement
string.Formatter.vformat. it takes an PEP 3101 "field name", and
returns a tuple of (first, rest): "first", the part before the
first '.' or '['; and "rest", an iterator for the rest of the field
name. it's a wrapper around stringlib/string_format.h's
field_name_split. The iterator it returns is a
FieldNameIterator */
static PyObject *
formatter_field_name_split(STRINGLIB_OBJECT *self)
{
SubString first;
Py_ssize_t first_idx;
fieldnameiterobject *it;
PyObject *first_obj = NULL;
PyObject *result = NULL;
it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
if (it == NULL)
return NULL;
/* take ownership, give the object to the iterator. this is
just to keep the field_name alive */
Py_INCREF(self);
it->str = self;
/* Pass in auto_number = NULL. We'll return an empty string for
first_obj in that case. */
if (!field_name_split(STRINGLIB_STR(self),
STRINGLIB_LEN(self),
&first, &first_idx, &it->it_field, NULL))
goto done;
/* first becomes an integer, if possible; else a string */
if (first_idx != -1)
first_obj = PyLong_FromSsize_t(first_idx);
else
/* convert "first" into a string object */
first_obj = SubString_new_object(&first);
if (first_obj == NULL)
goto done;
/* return a tuple of values */
result = PyTuple_Pack(2, first_obj, it);
done:
Py_XDECREF(it);
Py_XDECREF(first_obj);
return result;
}
#ifndef STRINGLIB_STRINGDEFS_H
#define STRINGLIB_STRINGDEFS_H
/* this is sort of a hack. there's at least one place (formatting
floats) where some stringlib code takes a different path if it's
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0
#define STRINGLIB_OBJECT PyStringObject
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER Py_TOUPPER
#define STRINGLIB_TOLOWER Py_TOLOWER
#define STRINGLIB_FILL memset
#define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_LEN PyString_GET_SIZE
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_RESIZE _PyString_Resize
#define STRINGLIB_CHECK PyString_Check
#define STRINGLIB_CHECK_EXACT PyString_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#define STRINGLIB_WANT_CONTAINS_OBJ 1
#endif /* !STRINGLIB_STRINGDEFS_H */
/* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */
/* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */
PyDoc_STRVAR(expandtabs__doc__,
"B.expandtabs([tabsize]) -> copy of B\n\
\n\
Return a copy of B where all tab characters are expanded using spaces.\n\
If tabsize is not given, a tab size of 8 characters is assumed.");
static PyObject*
stringlib_expandtabs(PyObject *self, PyObject *args)
{
const char *e, *p;
char *q;
Py_ssize_t i, j;
PyObject *u;
int tabsize = 8;
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
return NULL;
/* First pass: determine size of output string */
i = j = 0;
e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
for (p = STRINGLIB_STR(self); p < e; p++) {
if (*p == '\t') {
if (tabsize > 0) {
Py_ssize_t incr = tabsize - (j % tabsize);
if (j > PY_SSIZE_T_MAX - incr)
goto overflow;
j += incr;
}
}
else {
if (j > PY_SSIZE_T_MAX - 1)
goto overflow;
j++;
if (*p == '\n' || *p == '\r') {
if (i > PY_SSIZE_T_MAX - j)
goto overflow;
i += j;
j = 0;
}
}
}
if (i > PY_SSIZE_T_MAX - j)
goto overflow;
/* Second pass: create output string and fill it */
u = STRINGLIB_NEW(NULL, i + j);
if (!u)
return NULL;
j = 0;
q = STRINGLIB_STR(u);
for (p = STRINGLIB_STR(self); p < e; p++) {
if (*p == '\t') {
if (tabsize > 0) {
i = tabsize - (j % tabsize);
j += i;
while (i--)
*q++ = ' ';
}
}
else {
j++;
*q++ = *p;
if (*p == '\n' || *p == '\r')
j = 0;
}
}
return u;
overflow:
PyErr_SetString(PyExc_OverflowError, "result too long");
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
{
PyObject *u;
if (left < 0)
left = 0;
if (right < 0)
right = 0;
if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
#else
Py_INCREF(self);
return (PyObject *)self;
#endif /* STRINGLIB_MUTABLE */
}
u = STRINGLIB_NEW(NULL,
left + STRINGLIB_LEN(self) + right);
if (u) {
if (left)
memset(STRINGLIB_STR(u), fill, left);
Py_MEMCPY(STRINGLIB_STR(u) + left,
STRINGLIB_STR(self),
STRINGLIB_LEN(self));
if (right)
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
fill, right);
}
return u;
}
PyDoc_STRVAR(ljust__doc__,
"B.ljust(width[, fillchar]) -> copy of B\n"
"\n"
"Return B left justified in a string of length width. Padding is\n"
"done using the specified fill character (default is a space).");
static PyObject *
stringlib_ljust(PyObject *self, PyObject *args)
{
Py_ssize_t width;
char fillchar = ' ';
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
return NULL;
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
#else
Py_INCREF(self);
return (PyObject*) self;
#endif
}
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
}
PyDoc_STRVAR(rjust__doc__,
"B.rjust(width[, fillchar]) -> copy of B\n"
"\n"
"Return B right justified in a string of length width. Padding is\n"
"done using the specified fill character (default is a space)");
static PyObject *
stringlib_rjust(PyObject *self, PyObject *args)
{
Py_ssize_t width;
char fillchar = ' ';
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
return NULL;
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
#else
Py_INCREF(self);
return (PyObject*) self;
#endif
}
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
}
PyDoc_STRVAR(center__doc__,
"B.center(width[, fillchar]) -> copy of B\n"
"\n"
"Return B centered in a string of length width. Padding is\n"
"done using the specified fill character (default is a space).");
static PyObject *
stringlib_center(PyObject *self, PyObject *args)
{
Py_ssize_t marg, left;
Py_ssize_t width;
char fillchar = ' ';
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
return NULL;
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
#else
Py_INCREF(self);
return (PyObject*) self;
#endif
}
marg = width - STRINGLIB_LEN(self);
left = marg / 2 + (marg & width & 1);
return pad(self, left, marg - left, fillchar);
}
PyDoc_STRVAR(zfill__doc__,
"B.zfill(width) -> copy of B\n"
"\n"
"Pad a numeric string B with zeros on the left, to fill a field\n"
"of the specified width. B is never truncated.");
static PyObject *
stringlib_zfill(PyObject *self, PyObject *args)
{
Py_ssize_t fill;
PyObject *s;
char *p;
Py_ssize_t width;
if (!PyArg_ParseTuple(args, "n:zfill", &width))
return NULL;
if (STRINGLIB_LEN(self) >= width) {
if (STRINGLIB_CHECK_EXACT(self)) {
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
#else
Py_INCREF(self);
return (PyObject*) self;
#endif
}
else
return STRINGLIB_NEW(
STRINGLIB_STR(self),
STRINGLIB_LEN(self)
);
}
fill = width - STRINGLIB_LEN(self);
s = pad(self, fill, 0, '0');
if (s == NULL)
return NULL;
p = STRINGLIB_STR(s);
if (p[fill] == '+' || p[fill] == '-') {
/* move sign to beginning of string */
p[0] = p[fill];
p[fill] = '0';
}
return (PyObject*) s;
}
#ifndef STRINGLIB_UNICODEDEFS_H
#define STRINGLIB_UNICODEDEFS_H
/* this is sort of a hack. there's at least one place (formatting
floats) where some stringlib code takes a different path if it's
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 1
#define STRINGLIB_OBJECT PyUnicodeObject
#define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
#define STRINGLIB_FILL Py_UNICODE_FILL
#define STRINGLIB_STR PyUnicode_AS_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#if PY_VERSION_HEX < 0x03000000
#define STRINGLIB_TOSTR PyObject_Unicode
#else
#define STRINGLIB_TOSTR PyObject_Str
#endif
#define STRINGLIB_WANT_CONTAINS_OBJ 1
#endif /* !STRINGLIB_UNICODEDEFS_H */
// Pyston change: this is just a shim to import stuff from stringlib/
#include "Python.h"
#include "stringlib/stringdefs.h"
#include "stringlib/string_format.h"
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h"
// This file is originally from CPython 2.7, with modifications for Pyston
/****************************************************************
*
* The author of this software is David M. Gay.
*
* Copyright (c) 1991, 2000, 2001 by Lucent Technologies.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
*
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*
***************************************************************/
/****************************************************************
* This is dtoa.c by David M. Gay, downloaded from
* http://www.netlib.org/fp/dtoa.c on April 15, 2009 and modified for
* inclusion into the Python core by Mark E. T. Dickinson and Eric V. Smith.
*
* Please remember to check http://www.netlib.org/fp regularly (and especially
* before any Python release) for bugfixes and updates.
*
* The major modifications from Gay's original code are as follows:
*
* 0. The original code has been specialized to Python's needs by removing
* many of the #ifdef'd sections. In particular, code to support VAX and
* IBM floating-point formats, hex NaNs, hex floats, locale-aware
* treatment of the decimal point, and setting of the inexact flag have
* been removed.
*
* 1. We use PyMem_Malloc and PyMem_Free in place of malloc and free.
*
* 2. The public functions strtod, dtoa and freedtoa all now have
* a _Py_dg_ prefix.
*
* 3. Instead of assuming that PyMem_Malloc always succeeds, we thread
* PyMem_Malloc failures through the code. The functions
*
* Balloc, multadd, s2b, i2b, mult, pow5mult, lshift, diff, d2b
*
* of return type *Bigint all return NULL to indicate a malloc failure.
* Similarly, rv_alloc and nrv_alloc (return type char *) return NULL on
* failure. bigcomp now has return type int (it used to be void) and
* returns -1 on failure and 0 otherwise. _Py_dg_dtoa returns NULL
* on failure. _Py_dg_strtod indicates failure due to malloc failure
* by returning -1.0, setting errno=ENOMEM and *se to s00.
*
* 4. The static variable dtoa_result has been removed. Callers of
* _Py_dg_dtoa are expected to call _Py_dg_freedtoa to free
* the memory allocated by _Py_dg_dtoa.
*
* 5. The code has been reformatted to better fit with Python's
* C style guide (PEP 7).
*
* 6. A bug in the memory allocation has been fixed: to avoid FREEing memory
* that hasn't been MALLOC'ed, private_mem should only be used when k <=
* Kmax.
*
* 7. _Py_dg_strtod has been modified so that it doesn't accept strings with
* leading whitespace.
*
***************************************************************/
/* Please send bug reports for the original dtoa.c code to David M. Gay (dmg
* at acm dot org, with " at " changed at "@" and " dot " changed to ".").
* Please report bugs for this modified version using the Python issue tracker
* (http://bugs.python.org). */
/* On a machine with IEEE extended-precision registers, it is
* necessary to specify double-precision (53-bit) rounding precision
* before invoking strtod or dtoa. If the machine uses (the equivalent
* of) Intel 80x87 arithmetic, the call
* _control87(PC_53, MCW_PC);
* does this with many compilers. Whether this or another call is
* appropriate depends on the compiler; for this to work, it may be
* necessary to #include "float.h" or another system-dependent header
* file.
*/
/* strtod for IEEE-, VAX-, and IBM-arithmetic machines.
*
* This strtod returns a nearest machine number to the input decimal
* string (or sets errno to ERANGE). With IEEE arithmetic, ties are
* broken by the IEEE round-even rule. Otherwise ties are broken by
* biased rounding (add half and chop).
*
* Inspired loosely by William D. Clinger's paper "How to Read Floating
* Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101].
*
* Modifications:
*
* 1. We only require IEEE, IBM, or VAX double-precision
* arithmetic (not IEEE double-extended).
* 2. We get by with floating-point arithmetic in a case that
* Clinger missed -- when we're computing d * 10^n
* for a small integer d and the integer n is not too
* much larger than 22 (the maximum integer k for which
* we can represent 10^k exactly), we may be able to
* compute (d*10^k) * 10^(e-k) with just one roundoff.
* 3. Rather than a bit-at-a-time adjustment of the binary
* result in the hard case, we use floating-point
* arithmetic to determine the adjustment to within
* one bit; only in really hard cases do we need to
* compute a second residual.
* 4. Because of 3., we don't need a large table of powers of 10
* for ten-to-e (just some small tables, e.g. of 10^k
* for 0 <= k <= 22).
*/
/* Linking of Python's #defines to Gay's #defines starts here. */
#include "Python.h"
/* if PY_NO_SHORT_FLOAT_REPR is defined, then don't even try to compile
the following code */
#ifndef PY_NO_SHORT_FLOAT_REPR
#include "float.h"
#define MALLOC PyMem_Malloc
#define FREE PyMem_Free
/* This code should also work for ARM mixed-endian format on little-endian
machines, where doubles have byte order 45670123 (in increasing address
order, 0 being the least significant byte). */
#ifdef DOUBLE_IS_LITTLE_ENDIAN_IEEE754
# define IEEE_8087
#endif
#if defined(DOUBLE_IS_BIG_ENDIAN_IEEE754) || \
defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754)
# define IEEE_MC68k
#endif
#if defined(IEEE_8087) + defined(IEEE_MC68k) != 1
#error "Exactly one of IEEE_8087 or IEEE_MC68k should be defined."
#endif
/* The code below assumes that the endianness of integers matches the
endianness of the two 32-bit words of a double. Check this. */
#if defined(WORDS_BIGENDIAN) && (defined(DOUBLE_IS_LITTLE_ENDIAN_IEEE754) || \
defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754))
#error "doubles and ints have incompatible endianness"
#endif
#if !defined(WORDS_BIGENDIAN) && defined(DOUBLE_IS_BIG_ENDIAN_IEEE754)
#error "doubles and ints have incompatible endianness"
#endif
#if defined(HAVE_UINT32_T) && defined(HAVE_INT32_T)
typedef PY_UINT32_T ULong;
typedef PY_INT32_T Long;
#else
#error "Failed to find an exact-width 32-bit integer type"
#endif
#if defined(HAVE_UINT64_T)
#define ULLong PY_UINT64_T
#else
#undef ULLong
#endif
#undef DEBUG
#ifdef Py_DEBUG
#define DEBUG
#endif
/* End Python #define linking */
#ifdef DEBUG
#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);}
#endif
#ifndef PRIVATE_MEM
#define PRIVATE_MEM 2304
#endif
#define PRIVATE_mem ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double))
static double private_mem[PRIVATE_mem], *pmem_next = private_mem;
#ifdef __cplusplus
extern "C" {
#endif
typedef union { double d; ULong L[2]; } U;
#ifdef IEEE_8087
#define word0(x) (x)->L[1]
#define word1(x) (x)->L[0]
#else
#define word0(x) (x)->L[0]
#define word1(x) (x)->L[1]
#endif
#define dval(x) (x)->d
#ifndef STRTOD_DIGLIM
#define STRTOD_DIGLIM 40
#endif
/* maximum permitted exponent value for strtod; exponents larger than
MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP
should fit into an int. */
#ifndef MAX_ABS_EXP
#define MAX_ABS_EXP 1100000000U
#endif
/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically,
this is used to bound the total number of digits ignoring leading zeros and
the number of digits that follow the decimal point. Ideally, MAX_DIGITS
should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the
exponent clipping in _Py_dg_strtod can't affect the value of the output. */
#ifndef MAX_DIGITS
#define MAX_DIGITS 1000000000U
#endif
/* Guard against trying to use the above values on unusual platforms with ints
* of width less than 32 bits. */
#if MAX_ABS_EXP > INT_MAX
#error "MAX_ABS_EXP should fit in an int"
#endif
#if MAX_DIGITS > INT_MAX
#error "MAX_DIGITS should fit in an int"
#endif
/* The following definition of Storeinc is appropriate for MIPS processors.
* An alternative that might be better on some machines is
* #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff)
*/
#if defined(IEEE_8087)
#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \
((unsigned short *)a)[0] = (unsigned short)c, a++)
#else
#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \
((unsigned short *)a)[1] = (unsigned short)c, a++)
#endif
/* #define P DBL_MANT_DIG */
/* Ten_pmax = floor(P*log(2)/log(5)) */
/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */
/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */
#define Exp_shift 20
#define Exp_shift1 20
#define Exp_msk1 0x100000
#define Exp_msk11 0x100000
#define Exp_mask 0x7ff00000
#define P 53
#define Nbits 53
#define Bias 1023
#define Emax 1023
#define Emin (-1022)
#define Etiny (-1074) /* smallest denormal is 2**Etiny */
#define Exp_1 0x3ff00000
#define Exp_11 0x3ff00000
#define Ebits 11
#define Frac_mask 0xfffff
#define Frac_mask1 0xfffff
#define Ten_pmax 22
#define Bletch 0x10
#define Bndry_mask 0xfffff
#define Bndry_mask1 0xfffff
#define Sign_bit 0x80000000
#define Log2P 1
#define Tiny0 0
#define Tiny1 1
#define Quick_max 14
#define Int_max 14
#ifndef Flt_Rounds
#ifdef FLT_ROUNDS
#define Flt_Rounds FLT_ROUNDS
#else
#define Flt_Rounds 1
#endif
#endif /*Flt_Rounds*/
#define Rounding Flt_Rounds
#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
#define Big1 0xffffffff
/* struct BCinfo is used to pass information from _Py_dg_strtod to bigcomp */
typedef struct BCinfo BCinfo;
struct
BCinfo {
int e0, nd, nd0, scale;
};
#define FFFFFFFF 0xffffffffUL
#define Kmax 7
/* struct Bigint is used to represent arbitrary-precision integers. These
integers are stored in sign-magnitude format, with the magnitude stored as
an array of base 2**32 digits. Bigints are always normalized: if x is a
Bigint then x->wds >= 1, and either x->wds == 1 or x[wds-1] is nonzero.
The Bigint fields are as follows:
- next is a header used by Balloc and Bfree to keep track of lists
of freed Bigints; it's also used for the linked list of
powers of 5 of the form 5**2**i used by pow5mult.
- k indicates which pool this Bigint was allocated from
- maxwds is the maximum number of words space was allocated for
(usually maxwds == 2**k)
- sign is 1 for negative Bigints, 0 for positive. The sign is unused
(ignored on inputs, set to 0 on outputs) in almost all operations
involving Bigints: a notable exception is the diff function, which
ignores signs on inputs but sets the sign of the output correctly.
- wds is the actual number of significant words
- x contains the vector of words (digits) for this Bigint, from least
significant (x[0]) to most significant (x[wds-1]).
*/
struct
Bigint {
struct Bigint *next;
int k, maxwds, sign, wds;
ULong x[1];
};
typedef struct Bigint Bigint;
#ifndef Py_USING_MEMORY_DEBUGGER
/* Memory management: memory is allocated from, and returned to, Kmax+1 pools
of memory, where pool k (0 <= k <= Kmax) is for Bigints b with b->maxwds ==
1 << k. These pools are maintained as linked lists, with freelist[k]
pointing to the head of the list for pool k.
On allocation, if there's no free slot in the appropriate pool, MALLOC is
called to get more memory. This memory is not returned to the system until
Python quits. There's also a private memory pool that's allocated from
in preference to using MALLOC.
For Bigints with more than (1 << Kmax) digits (which implies at least 1233
decimal digits), memory is directly allocated using MALLOC, and freed using
FREE.
XXX: it would be easy to bypass this memory-management system and
translate each call to Balloc into a call to PyMem_Malloc, and each
Bfree to PyMem_Free. Investigate whether this has any significant
performance on impact. */
static Bigint *freelist[Kmax+1];
/* Allocate space for a Bigint with up to 1<<k digits */
static Bigint *
Balloc(int k)
{
int x;
Bigint *rv;
unsigned int len;
if (k <= Kmax && (rv = freelist[k]))
freelist[k] = rv->next;
else {
x = 1 << k;
len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
/sizeof(double);
if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
rv = (Bigint*)pmem_next;
pmem_next += len;
}
else {
rv = (Bigint*)MALLOC(len*sizeof(double));
if (rv == NULL)
return NULL;
}
rv->k = k;
rv->maxwds = x;
}
rv->sign = rv->wds = 0;
return rv;
}
/* Free a Bigint allocated with Balloc */
static void
Bfree(Bigint *v)
{
if (v) {
if (v->k > Kmax)
FREE((void*)v);
else {
v->next = freelist[v->k];
freelist[v->k] = v;
}
}
}
#else
/* Alternative versions of Balloc and Bfree that use PyMem_Malloc and
PyMem_Free directly in place of the custom memory allocation scheme above.
These are provided for the benefit of memory debugging tools like
Valgrind. */
/* Allocate space for a Bigint with up to 1<<k digits */
static Bigint *
Balloc(int k)
{
int x;
Bigint *rv;
unsigned int len;
x = 1 << k;
len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
/sizeof(double);
rv = (Bigint*)MALLOC(len*sizeof(double));
if (rv == NULL)
return NULL;
rv->k = k;
rv->maxwds = x;
rv->sign = rv->wds = 0;
return rv;
}
/* Free a Bigint allocated with Balloc */
static void
Bfree(Bigint *v)
{
if (v) {
FREE((void*)v);
}
}
#endif /* Py_USING_MEMORY_DEBUGGER */
#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
y->wds*sizeof(Long) + 2*sizeof(int))
/* Multiply a Bigint b by m and add a. Either modifies b in place and returns
a pointer to the modified b, or Bfrees b and returns a pointer to a copy.
On failure, return NULL. In this case, b will have been already freed. */
static Bigint *
multadd(Bigint *b, int m, int a) /* multiply by m and add a */
{
int i, wds;
#ifdef ULLong
ULong *x;
ULLong carry, y;
#else
ULong carry, *x, y;
ULong xi, z;
#endif
Bigint *b1;
wds = b->wds;
x = b->x;
i = 0;
carry = a;
do {
#ifdef ULLong
y = *x * (ULLong)m + carry;
carry = y >> 32;
*x++ = (ULong)(y & FFFFFFFF);
#else
xi = *x;
y = (xi & 0xffff) * m + carry;
z = (xi >> 16) * m + (y >> 16);
carry = z >> 16;
*x++ = (z << 16) + (y & 0xffff);
#endif
}
while(++i < wds);
if (carry) {
if (wds >= b->maxwds) {
b1 = Balloc(b->k+1);
if (b1 == NULL){
Bfree(b);
return NULL;
}
Bcopy(b1, b);
Bfree(b);
b = b1;
}
b->x[wds++] = (ULong)carry;
b->wds = wds;
}
return b;
}
/* convert a string s containing nd decimal digits (possibly containing a
decimal separator at position nd0, which is ignored) to a Bigint. This
function carries on where the parsing code in _Py_dg_strtod leaves off: on
entry, y9 contains the result of converting the first 9 digits. Returns
NULL on failure. */
static Bigint *
s2b(const char *s, int nd0, int nd, ULong y9)
{
Bigint *b;
int i, k;
Long x, y;
x = (nd + 8) / 9;
for(k = 0, y = 1; x > y; y <<= 1, k++) ;
b = Balloc(k);
if (b == NULL)
return NULL;
b->x[0] = y9;
b->wds = 1;
if (nd <= 9)
return b;
s += 9;
for (i = 9; i < nd0; i++) {
b = multadd(b, 10, *s++ - '0');
if (b == NULL)
return NULL;
}
s++;
for(; i < nd; i++) {
b = multadd(b, 10, *s++ - '0');
if (b == NULL)
return NULL;
}
return b;
}
/* count leading 0 bits in the 32-bit integer x. */
static int
hi0bits(ULong x)
{
int k = 0;
if (!(x & 0xffff0000)) {
k = 16;
x <<= 16;
}
if (!(x & 0xff000000)) {
k += 8;
x <<= 8;
}
if (!(x & 0xf0000000)) {
k += 4;
x <<= 4;
}
if (!(x & 0xc0000000)) {
k += 2;
x <<= 2;
}
if (!(x & 0x80000000)) {
k++;
if (!(x & 0x40000000))
return 32;
}
return k;
}
/* count trailing 0 bits in the 32-bit integer y, and shift y right by that
number of bits. */
static int
lo0bits(ULong *y)
{
int k;
ULong x = *y;
if (x & 7) {
if (x & 1)
return 0;
if (x & 2) {
*y = x >> 1;
return 1;
}
*y = x >> 2;
return 2;
}
k = 0;
if (!(x & 0xffff)) {
k = 16;
x >>= 16;
}
if (!(x & 0xff)) {
k += 8;
x >>= 8;
}
if (!(x & 0xf)) {
k += 4;
x >>= 4;
}
if (!(x & 0x3)) {
k += 2;
x >>= 2;
}
if (!(x & 1)) {
k++;
x >>= 1;
if (!x)
return 32;
}
*y = x;
return k;
}
/* convert a small nonnegative integer to a Bigint */
static Bigint *
i2b(int i)
{
Bigint *b;
b = Balloc(1);
if (b == NULL)
return NULL;
b->x[0] = i;
b->wds = 1;
return b;
}
/* multiply two Bigints. Returns a new Bigint, or NULL on failure. Ignores
the signs of a and b. */
static Bigint *
mult(Bigint *a, Bigint *b)
{
Bigint *c;
int k, wa, wb, wc;
ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0;
ULong y;
#ifdef ULLong
ULLong carry, z;
#else
ULong carry, z;
ULong z2;
#endif
if ((!a->x[0] && a->wds == 1) || (!b->x[0] && b->wds == 1)) {
c = Balloc(0);
if (c == NULL)
return NULL;
c->wds = 1;
c->x[0] = 0;
return c;
}
if (a->wds < b->wds) {
c = a;
a = b;
b = c;
}
k = a->k;
wa = a->wds;
wb = b->wds;
wc = wa + wb;
if (wc > a->maxwds)
k++;
c = Balloc(k);
if (c == NULL)
return NULL;
for(x = c->x, xa = x + wc; x < xa; x++)
*x = 0;
xa = a->x;
xae = xa + wa;
xb = b->x;
xbe = xb + wb;
xc0 = c->x;
#ifdef ULLong
for(; xb < xbe; xc0++) {
if ((y = *xb++)) {
x = xa;
xc = xc0;
carry = 0;
do {
z = *x++ * (ULLong)y + *xc + carry;
carry = z >> 32;
*xc++ = (ULong)(z & FFFFFFFF);
}
while(x < xae);
*xc = (ULong)carry;
}
}
#else
for(; xb < xbe; xb++, xc0++) {
if (y = *xb & 0xffff) {
x = xa;
xc = xc0;
carry = 0;
do {
z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
carry = z >> 16;
z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
carry = z2 >> 16;
Storeinc(xc, z2, z);
}
while(x < xae);
*xc = carry;
}
if (y = *xb >> 16) {
x = xa;
xc = xc0;
carry = 0;
z2 = *xc;
do {
z = (*x & 0xffff) * y + (*xc >> 16) + carry;
carry = z >> 16;
Storeinc(xc, z, z2);
z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
carry = z2 >> 16;
}
while(x < xae);
*xc = z2;
}
}
#endif
for(xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ;
c->wds = wc;
return c;
}
#ifndef Py_USING_MEMORY_DEBUGGER
/* p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2 */
static Bigint *p5s;
/* multiply the Bigint b by 5**k. Returns a pointer to the result, or NULL on
failure; if the returned pointer is distinct from b then the original
Bigint b will have been Bfree'd. Ignores the sign of b. */
static Bigint *
pow5mult(Bigint *b, int k)
{
Bigint *b1, *p5, *p51;
int i;
static int p05[3] = { 5, 25, 125 };
if ((i = k & 3)) {
b = multadd(b, p05[i-1], 0);
if (b == NULL)
return NULL;
}
if (!(k >>= 2))
return b;
p5 = p5s;
if (!p5) {
/* first time */
p5 = i2b(625);
if (p5 == NULL) {
Bfree(b);
return NULL;
}
p5s = p5;
p5->next = 0;
}
for(;;) {
if (k & 1) {
b1 = mult(b, p5);
Bfree(b);
b = b1;
if (b == NULL)
return NULL;
}
if (!(k >>= 1))
break;
p51 = p5->next;
if (!p51) {
p51 = mult(p5,p5);
if (p51 == NULL) {
Bfree(b);
return NULL;
}
p51->next = 0;
p5->next = p51;
}
p5 = p51;
}
return b;
}
#else
/* Version of pow5mult that doesn't cache powers of 5. Provided for
the benefit of memory debugging tools like Valgrind. */
static Bigint *
pow5mult(Bigint *b, int k)
{
Bigint *b1, *p5, *p51;
int i;
static int p05[3] = { 5, 25, 125 };
if ((i = k & 3)) {
b = multadd(b, p05[i-1], 0);
if (b == NULL)
return NULL;
}
if (!(k >>= 2))
return b;
p5 = i2b(625);
if (p5 == NULL) {
Bfree(b);
return NULL;
}
for(;;) {
if (k & 1) {
b1 = mult(b, p5);
Bfree(b);
b = b1;
if (b == NULL) {
Bfree(p5);
return NULL;
}
}
if (!(k >>= 1))
break;
p51 = mult(p5, p5);
Bfree(p5);
p5 = p51;
if (p5 == NULL) {
Bfree(b);
return NULL;
}
}
Bfree(p5);
return b;
}
#endif /* Py_USING_MEMORY_DEBUGGER */
/* shift a Bigint b left by k bits. Return a pointer to the shifted result,
or NULL on failure. If the returned pointer is distinct from b then the
original b will have been Bfree'd. Ignores the sign of b. */
static Bigint *
lshift(Bigint *b, int k)
{
int i, k1, n, n1;
Bigint *b1;
ULong *x, *x1, *xe, z;
if (!k || (!b->x[0] && b->wds == 1))
return b;
n = k >> 5;
k1 = b->k;
n1 = n + b->wds + 1;
for(i = b->maxwds; n1 > i; i <<= 1)
k1++;
b1 = Balloc(k1);
if (b1 == NULL) {
Bfree(b);
return NULL;
}
x1 = b1->x;
for(i = 0; i < n; i++)
*x1++ = 0;
x = b->x;
xe = x + b->wds;
if (k &= 0x1f) {
k1 = 32 - k;
z = 0;
do {
*x1++ = *x << k | z;
z = *x++ >> k1;
}
while(x < xe);
if ((*x1 = z))
++n1;
}
else do
*x1++ = *x++;
while(x < xe);
b1->wds = n1 - 1;
Bfree(b);
return b1;
}
/* Do a three-way compare of a and b, returning -1 if a < b, 0 if a == b and
1 if a > b. Ignores signs of a and b. */
static int
cmp(Bigint *a, Bigint *b)
{
ULong *xa, *xa0, *xb, *xb0;
int i, j;
i = a->wds;
j = b->wds;
#ifdef DEBUG
if (i > 1 && !a->x[i-1])
Bug("cmp called with a->x[a->wds-1] == 0");
if (j > 1 && !b->x[j-1])
Bug("cmp called with b->x[b->wds-1] == 0");
#endif
if (i -= j)
return i;
xa0 = a->x;
xa = xa0 + j;
xb0 = b->x;
xb = xb0 + j;
for(;;) {
if (*--xa != *--xb)
return *xa < *xb ? -1 : 1;
if (xa <= xa0)
break;
}
return 0;
}
/* Take the difference of Bigints a and b, returning a new Bigint. Returns
NULL on failure. The signs of a and b are ignored, but the sign of the
result is set appropriately. */
static Bigint *
diff(Bigint *a, Bigint *b)
{
Bigint *c;
int i, wa, wb;
ULong *xa, *xae, *xb, *xbe, *xc;
#ifdef ULLong
ULLong borrow, y;
#else
ULong borrow, y;
ULong z;
#endif
i = cmp(a,b);
if (!i) {
c = Balloc(0);
if (c == NULL)
return NULL;
c->wds = 1;
c->x[0] = 0;
return c;
}
if (i < 0) {
c = a;
a = b;
b = c;
i = 1;
}
else
i = 0;
c = Balloc(a->k);
if (c == NULL)
return NULL;
c->sign = i;
wa = a->wds;
xa = a->x;
xae = xa + wa;
wb = b->wds;
xb = b->x;
xbe = xb + wb;
xc = c->x;
borrow = 0;
#ifdef ULLong
do {
y = (ULLong)*xa++ - *xb++ - borrow;
borrow = y >> 32 & (ULong)1;
*xc++ = (ULong)(y & FFFFFFFF);
}
while(xb < xbe);
while(xa < xae) {
y = *xa++ - borrow;
borrow = y >> 32 & (ULong)1;
*xc++ = (ULong)(y & FFFFFFFF);
}
#else
do {
y = (*xa & 0xffff) - (*xb & 0xffff) - borrow;
borrow = (y & 0x10000) >> 16;
z = (*xa++ >> 16) - (*xb++ >> 16) - borrow;
borrow = (z & 0x10000) >> 16;
Storeinc(xc, z, y);
}
while(xb < xbe);
while(xa < xae) {
y = (*xa & 0xffff) - borrow;
borrow = (y & 0x10000) >> 16;
z = (*xa++ >> 16) - borrow;
borrow = (z & 0x10000) >> 16;
Storeinc(xc, z, y);
}
#endif
while(!*--xc)
wa--;
c->wds = wa;
return c;
}
/* Given a positive normal double x, return the difference between x and the
next double up. Doesn't give correct results for subnormals. */
static double
ulp(U *x)
{
Long L;
U u;
L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1;
word0(&u) = L;
word1(&u) = 0;
return dval(&u);
}
/* Convert a Bigint to a double plus an exponent */
static double
b2d(Bigint *a, int *e)
{
ULong *xa, *xa0, w, y, z;
int k;
U d;
xa0 = a->x;
xa = xa0 + a->wds;
y = *--xa;
#ifdef DEBUG
if (!y) Bug("zero y in b2d");
#endif
k = hi0bits(y);
*e = 32 - k;
if (k < Ebits) {
word0(&d) = Exp_1 | y >> (Ebits - k);
w = xa > xa0 ? *--xa : 0;
word1(&d) = y << ((32-Ebits) + k) | w >> (Ebits - k);
goto ret_d;
}
z = xa > xa0 ? *--xa : 0;
if (k -= Ebits) {
word0(&d) = Exp_1 | y << k | z >> (32 - k);
y = xa > xa0 ? *--xa : 0;
word1(&d) = z << k | y >> (32 - k);
}
else {
word0(&d) = Exp_1 | y;
word1(&d) = z;
}
ret_d:
return dval(&d);
}
/* Convert a scaled double to a Bigint plus an exponent. Similar to d2b,
except that it accepts the scale parameter used in _Py_dg_strtod (which
should be either 0 or 2*P), and the normalization for the return value is
different (see below). On input, d should be finite and nonnegative, and d
/ 2**scale should be exactly representable as an IEEE 754 double.
Returns a Bigint b and an integer e such that
dval(d) / 2**scale = b * 2**e.
Unlike d2b, b is not necessarily odd: b and e are normalized so
that either 2**(P-1) <= b < 2**P and e >= Etiny, or b < 2**P
and e == Etiny. This applies equally to an input of 0.0: in that
case the return values are b = 0 and e = Etiny.
The above normalization ensures that for all possible inputs d,
2**e gives ulp(d/2**scale).
Returns NULL on failure.
*/
static Bigint *
sd2b(U *d, int scale, int *e)
{
Bigint *b;
b = Balloc(1);
if (b == NULL)
return NULL;
/* First construct b and e assuming that scale == 0. */
b->wds = 2;
b->x[0] = word1(d);
b->x[1] = word0(d) & Frac_mask;
*e = Etiny - 1 + (int)((word0(d) & Exp_mask) >> Exp_shift);
if (*e < Etiny)
*e = Etiny;
else
b->x[1] |= Exp_msk1;
/* Now adjust for scale, provided that b != 0. */
if (scale && (b->x[0] || b->x[1])) {
*e -= scale;
if (*e < Etiny) {
scale = Etiny - *e;
*e = Etiny;
/* We can't shift more than P-1 bits without shifting out a 1. */
assert(0 < scale && scale <= P - 1);
if (scale >= 32) {
/* The bits shifted out should all be zero. */
assert(b->x[0] == 0);
b->x[0] = b->x[1];
b->x[1] = 0;
scale -= 32;
}
if (scale) {
/* The bits shifted out should all be zero. */
assert(b->x[0] << (32 - scale) == 0);
b->x[0] = (b->x[0] >> scale) | (b->x[1] << (32 - scale));
b->x[1] >>= scale;
}
}
}
/* Ensure b is normalized. */
if (!b->x[1])
b->wds = 1;
return b;
}
/* Convert a double to a Bigint plus an exponent. Return NULL on failure.
Given a finite nonzero double d, return an odd Bigint b and exponent *e
such that fabs(d) = b * 2**e. On return, *bbits gives the number of
significant bits of b; that is, 2**(*bbits-1) <= b < 2**(*bbits).
If d is zero, then b == 0, *e == -1010, *bbits = 0.
*/
static Bigint *
d2b(U *d, int *e, int *bits)
{
Bigint *b;
int de, k;
ULong *x, y, z;
int i;
b = Balloc(1);
if (b == NULL)
return NULL;
x = b->x;
z = word0(d) & Frac_mask;
word0(d) &= 0x7fffffff; /* clear sign bit, which we ignore */
if ((de = (int)(word0(d) >> Exp_shift)))
z |= Exp_msk1;
if ((y = word1(d))) {
if ((k = lo0bits(&y))) {
x[0] = y | z << (32 - k);
z >>= k;
}
else
x[0] = y;
i =
b->wds = (x[1] = z) ? 2 : 1;
}
else {
k = lo0bits(&z);
x[0] = z;
i =
b->wds = 1;
k += 32;
}
if (de) {
*e = de - Bias - (P-1) + k;
*bits = P - k;
}
else {
*e = de - Bias - (P-1) + 1 + k;
*bits = 32*i - hi0bits(x[i-1]);
}
return b;
}
/* Compute the ratio of two Bigints, as a double. The result may have an
error of up to 2.5 ulps. */
static double
ratio(Bigint *a, Bigint *b)
{
U da, db;
int k, ka, kb;
dval(&da) = b2d(a, &ka);
dval(&db) = b2d(b, &kb);
k = ka - kb + 32*(a->wds - b->wds);
if (k > 0)
word0(&da) += k*Exp_msk1;
else {
k = -k;
word0(&db) += k*Exp_msk1;
}
return dval(&da) / dval(&db);
}
static const double
tens[] = {
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22
};
static const double
bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
9007199254740992.*9007199254740992.e-256
/* = 2^106 * 1e-256 */
};
/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */
/* flag unnecessarily. It leads to a song and dance at the end of strtod. */
#define Scale_Bit 0x10
#define n_bigtens 5
#define ULbits 32
#define kshift 5
#define kmask 31
static int
dshift(Bigint *b, int p2)
{
int rv = hi0bits(b->x[b->wds-1]) - 4;
if (p2 > 0)
rv -= p2;
return rv & kmask;
}
/* special case of Bigint division. The quotient is always in the range 0 <=
quotient < 10, and on entry the divisor S is normalized so that its top 4
bits (28--31) are zero and bit 27 is set. */
static int
quorem(Bigint *b, Bigint *S)
{
int n;
ULong *bx, *bxe, q, *sx, *sxe;
#ifdef ULLong
ULLong borrow, carry, y, ys;
#else
ULong borrow, carry, y, ys;
ULong si, z, zs;
#endif
n = S->wds;
#ifdef DEBUG
/*debug*/ if (b->wds > n)
/*debug*/ Bug("oversize b in quorem");
#endif
if (b->wds < n)
return 0;
sx = S->x;
sxe = sx + --n;
bx = b->x;
bxe = bx + n;
q = *bxe / (*sxe + 1); /* ensure q <= true quotient */
#ifdef DEBUG
/*debug*/ if (q > 9)
/*debug*/ Bug("oversized quotient in quorem");
#endif
if (q) {
borrow = 0;
carry = 0;
do {
#ifdef ULLong
ys = *sx++ * (ULLong)q + carry;
carry = ys >> 32;
y = *bx - (ys & FFFFFFFF) - borrow;
borrow = y >> 32 & (ULong)1;
*bx++ = (ULong)(y & FFFFFFFF);
#else
si = *sx++;
ys = (si & 0xffff) * q + carry;
zs = (si >> 16) * q + (ys >> 16);
carry = zs >> 16;
y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
borrow = (y & 0x10000) >> 16;
z = (*bx >> 16) - (zs & 0xffff) - borrow;
borrow = (z & 0x10000) >> 16;
Storeinc(bx, z, y);
#endif
}
while(sx <= sxe);
if (!*bxe) {
bx = b->x;
while(--bxe > bx && !*bxe)
--n;
b->wds = n;
}
}
if (cmp(b, S) >= 0) {
q++;
borrow = 0;
carry = 0;
bx = b->x;
sx = S->x;
do {
#ifdef ULLong
ys = *sx++ + carry;
carry = ys >> 32;
y = *bx - (ys & FFFFFFFF) - borrow;
borrow = y >> 32 & (ULong)1;
*bx++ = (ULong)(y & FFFFFFFF);
#else
si = *sx++;
ys = (si & 0xffff) + carry;
zs = (si >> 16) + (ys >> 16);
carry = zs >> 16;
y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
borrow = (y & 0x10000) >> 16;
z = (*bx >> 16) - (zs & 0xffff) - borrow;
borrow = (z & 0x10000) >> 16;
Storeinc(bx, z, y);
#endif
}
while(sx <= sxe);
bx = b->x;
bxe = bx + n;
if (!*bxe) {
while(--bxe > bx && !*bxe)
--n;
b->wds = n;
}
}
return q;
}
/* sulp(x) is a version of ulp(x) that takes bc.scale into account.
Assuming that x is finite and nonnegative (positive zero is fine
here) and x / 2^bc.scale is exactly representable as a double,
sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale). */
static double
sulp(U *x, BCinfo *bc)
{
U u;
if (bc->scale && 2*P + 1 > (int)((word0(x) & Exp_mask) >> Exp_shift)) {
/* rv/2^bc->scale is subnormal */
word0(&u) = (P+2)*Exp_msk1;
word1(&u) = 0;
return u.d;
}
else {
assert(word0(x) || word1(x)); /* x != 0.0 */
return ulp(x);
}
}
/* The bigcomp function handles some hard cases for strtod, for inputs
with more than STRTOD_DIGLIM digits. It's called once an initial
estimate for the double corresponding to the input string has
already been obtained by the code in _Py_dg_strtod.
The bigcomp function is only called after _Py_dg_strtod has found a
double value rv such that either rv or rv + 1ulp represents the
correctly rounded value corresponding to the original string. It
determines which of these two values is the correct one by
computing the decimal digits of rv + 0.5ulp and comparing them with
the corresponding digits of s0.
In the following, write dv for the absolute value of the number represented
by the input string.
Inputs:
s0 points to the first significant digit of the input string.
rv is a (possibly scaled) estimate for the closest double value to the
value represented by the original input to _Py_dg_strtod. If
bc->scale is nonzero, then rv/2^(bc->scale) is the approximation to
the input value.
bc is a struct containing information gathered during the parsing and
estimation steps of _Py_dg_strtod. Description of fields follows:
bc->e0 gives the exponent of the input value, such that dv = (integer
given by the bd->nd digits of s0) * 10**e0
bc->nd gives the total number of significant digits of s0. It will
be at least 1.
bc->nd0 gives the number of significant digits of s0 before the
decimal separator. If there's no decimal separator, bc->nd0 ==
bc->nd.
bc->scale is the value used to scale rv to avoid doing arithmetic with
subnormal values. It's either 0 or 2*P (=106).
Outputs:
On successful exit, rv/2^(bc->scale) is the closest double to dv.
Returns 0 on success, -1 on failure (e.g., due to a failed malloc call). */
static int
bigcomp(U *rv, const char *s0, BCinfo *bc)
{
Bigint *b, *d;
int b2, d2, dd, i, nd, nd0, odd, p2, p5;
nd = bc->nd;
nd0 = bc->nd0;
p5 = nd + bc->e0;
b = sd2b(rv, bc->scale, &p2);
if (b == NULL)
return -1;
/* record whether the lsb of rv/2^(bc->scale) is odd: in the exact halfway
case, this is used for round to even. */
odd = b->x[0] & 1;
/* left shift b by 1 bit and or a 1 into the least significant bit;
this gives us b * 2**p2 = rv/2^(bc->scale) + 0.5 ulp. */
b = lshift(b, 1);
if (b == NULL)
return -1;
b->x[0] |= 1;
p2--;
p2 -= p5;
d = i2b(1);
if (d == NULL) {
Bfree(b);
return -1;
}
/* Arrange for convenient computation of quotients:
* shift left if necessary so divisor has 4 leading 0 bits.
*/
if (p5 > 0) {
d = pow5mult(d, p5);
if (d == NULL) {
Bfree(b);
return -1;
}
}
else if (p5 < 0) {
b = pow5mult(b, -p5);
if (b == NULL) {
Bfree(d);
return -1;
}
}
if (p2 > 0) {
b2 = p2;
d2 = 0;
}
else {
b2 = 0;
d2 = -p2;
}
i = dshift(d, d2);
if ((b2 += i) > 0) {
b = lshift(b, b2);
if (b == NULL) {
Bfree(d);
return -1;
}
}
if ((d2 += i) > 0) {
d = lshift(d, d2);
if (d == NULL) {
Bfree(b);
return -1;
}
}
/* Compare s0 with b/d: set dd to -1, 0, or 1 according as s0 < b/d, s0 ==
* b/d, or s0 > b/d. Here the digits of s0 are thought of as representing
* a number in the range [0.1, 1). */
if (cmp(b, d) >= 0)
/* b/d >= 1 */
dd = -1;
else {
i = 0;
for(;;) {
b = multadd(b, 10, 0);
if (b == NULL) {
Bfree(d);
return -1;
}
dd = s0[i < nd0 ? i : i+1] - '0' - quorem(b, d);
i++;
if (dd)
break;
if (!b->x[0] && b->wds == 1) {
/* b/d == 0 */
dd = i < nd;
break;
}
if (!(i < nd)) {
/* b/d != 0, but digits of s0 exhausted */
dd = -1;
break;
}
}
}
Bfree(b);
Bfree(d);
if (dd > 0 || (dd == 0 && odd))
dval(rv) += sulp(rv, bc);
return 0;
}
double
_Py_dg_strtod(const char *s00, char **se)
{
int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error;
int esign, i, j, k, lz, nd, nd0, odd, sign;
const char *s, *s0, *s1;
double aadj, aadj1;
U aadj2, adj, rv, rv0;
ULong y, z, abs_exp;
Long L;
BCinfo bc;
Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
size_t ndigits, fraclen;
dval(&rv) = 0.;
/* Start parsing. */
c = *(s = s00);
/* Parse optional sign, if present. */
sign = 0;
switch (c) {
case '-':
sign = 1;
/* no break */
case '+':
c = *++s;
}
/* Skip leading zeros: lz is true iff there were leading zeros. */
s1 = s;
while (c == '0')
c = *++s;
lz = s != s1;
/* Point s0 at the first nonzero digit (if any). fraclen will be the
number of digits between the decimal point and the end of the
digit string. ndigits will be the total number of digits ignoring
leading zeros. */
s0 = s1 = s;
while ('0' <= c && c <= '9')
c = *++s;
ndigits = s - s1;
fraclen = 0;
/* Parse decimal point and following digits. */
if (c == '.') {
c = *++s;
if (!ndigits) {
s1 = s;
while (c == '0')
c = *++s;
lz = lz || s != s1;
fraclen += (s - s1);
s0 = s;
}
s1 = s;
while ('0' <= c && c <= '9')
c = *++s;
ndigits += s - s1;
fraclen += s - s1;
}
/* Now lz is true if and only if there were leading zero digits, and
ndigits gives the total number of digits ignoring leading zeros. A
valid input must have at least one digit. */
if (!ndigits && !lz) {
if (se)
*se = (char *)s00;
goto parse_error;
}
/* Range check ndigits and fraclen to make sure that they, and values
computed with them, can safely fit in an int. */
if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) {
if (se)
*se = (char *)s00;
goto parse_error;
}
nd = (int)ndigits;
nd0 = (int)ndigits - (int)fraclen;
/* Parse exponent. */
e = 0;
if (c == 'e' || c == 'E') {
s00 = s;
c = *++s;
/* Exponent sign. */
esign = 0;
switch (c) {
case '-':
esign = 1;
/* no break */
case '+':
c = *++s;
}
/* Skip zeros. lz is true iff there are leading zeros. */
s1 = s;
while (c == '0')
c = *++s;
lz = s != s1;
/* Get absolute value of the exponent. */
s1 = s;
abs_exp = 0;
while ('0' <= c && c <= '9') {
abs_exp = 10*abs_exp + (c - '0');
c = *++s;
}
/* abs_exp will be correct modulo 2**32. But 10**9 < 2**32, so if
there are at most 9 significant exponent digits then overflow is
impossible. */
if (s - s1 > 9 || abs_exp > MAX_ABS_EXP)
e = (int)MAX_ABS_EXP;
else
e = (int)abs_exp;
if (esign)
e = -e;
/* A valid exponent must have at least one digit. */
if (s == s1 && !lz)
s = s00;
}
/* Adjust exponent to take into account position of the point. */
e -= nd - nd0;
if (nd0 <= 0)
nd0 = nd;
/* Finished parsing. Set se to indicate how far we parsed */
if (se)
*se = (char *)s;
/* If all digits were zero, exit with return value +-0.0. Otherwise,
strip trailing zeros: scan back until we hit a nonzero digit. */
if (!nd)
goto ret;
for (i = nd; i > 0; ) {
--i;
if (s0[i < nd0 ? i : i+1] != '0') {
++i;
break;
}
}
e += nd - i;
nd = i;
if (nd0 > nd)
nd0 = nd;
/* Summary of parsing results. After parsing, and dealing with zero
* inputs, we have values s0, nd0, nd, e, sign, where:
*
* - s0 points to the first significant digit of the input string
*
* - nd is the total number of significant digits (here, and
* below, 'significant digits' means the set of digits of the
* significand of the input that remain after ignoring leading
* and trailing zeros).
*
* - nd0 indicates the position of the decimal point, if present; it
* satisfies 1 <= nd0 <= nd. The nd significant digits are in
* s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice
* notation. (If nd0 < nd, then s0[nd0] contains a '.' character; if
* nd0 == nd, then s0[nd0] could be any non-digit character.)
*
* - e is the adjusted exponent: the absolute value of the number
* represented by the original input string is n * 10**e, where
* n is the integer represented by the concatenation of
* s0[0:nd0] and s0[nd0+1:nd+1]
*
* - sign gives the sign of the input: 1 for negative, 0 for positive
*
* - the first and last significant digits are nonzero
*/
/* put first DBL_DIG+1 digits into integer y and z.
*
* - y contains the value represented by the first min(9, nd)
* significant digits
*
* - if nd > 9, z contains the value represented by significant digits
* with indices in [9, min(16, nd)). So y * 10**(min(16, nd) - 9) + z
* gives the value represented by the first min(16, nd) sig. digits.
*/
bc.e0 = e1 = e;
y = z = 0;
for (i = 0; i < nd; i++) {
if (i < 9)
y = 10*y + s0[i < nd0 ? i : i+1] - '0';
else if (i < DBL_DIG+1)
z = 10*z + s0[i < nd0 ? i : i+1] - '0';
else
break;
}
k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
dval(&rv) = y;
if (k > 9) {
dval(&rv) = tens[k - 9] * dval(&rv) + z;
}
bd0 = 0;
if (nd <= DBL_DIG
&& Flt_Rounds == 1
) {
if (!e)
goto ret;
if (e > 0) {
if (e <= Ten_pmax) {
dval(&rv) *= tens[e];
goto ret;
}
i = DBL_DIG - nd;
if (e <= Ten_pmax + i) {
/* A fancier test would sometimes let us do
* this for larger i values.
*/
e -= i;
dval(&rv) *= tens[i];
dval(&rv) *= tens[e];
goto ret;
}
}
else if (e >= -Ten_pmax) {
dval(&rv) /= tens[-e];
goto ret;
}
}
e1 += nd - k;
bc.scale = 0;
/* Get starting approximation = rv * 10**e1 */
if (e1 > 0) {
if ((i = e1 & 15))
dval(&rv) *= tens[i];
if (e1 &= ~15) {
if (e1 > DBL_MAX_10_EXP)
goto ovfl;
e1 >>= 4;
for(j = 0; e1 > 1; j++, e1 >>= 1)
if (e1 & 1)
dval(&rv) *= bigtens[j];
/* The last multiplication could overflow. */
word0(&rv) -= P*Exp_msk1;
dval(&rv) *= bigtens[j];
if ((z = word0(&rv) & Exp_mask)
> Exp_msk1*(DBL_MAX_EXP+Bias-P))
goto ovfl;
if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) {
/* set to largest number */
/* (Can't trust DBL_MAX) */
word0(&rv) = Big0;
word1(&rv) = Big1;
}
else
word0(&rv) += P*Exp_msk1;
}
}
else if (e1 < 0) {
/* The input decimal value lies in [10**e1, 10**(e1+16)).
If e1 <= -512, underflow immediately.
If e1 <= -256, set bc.scale to 2*P.
So for input value < 1e-256, bc.scale is always set;
for input value >= 1e-240, bc.scale is never set.
For input values in [1e-256, 1e-240), bc.scale may or may
not be set. */
e1 = -e1;
if ((i = e1 & 15))
dval(&rv) /= tens[i];
if (e1 >>= 4) {
if (e1 >= 1 << n_bigtens)
goto undfl;
if (e1 & Scale_Bit)
bc.scale = 2*P;
for(j = 0; e1 > 0; j++, e1 >>= 1)
if (e1 & 1)
dval(&rv) *= tinytens[j];
if (bc.scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask)
>> Exp_shift)) > 0) {
/* scaled rv is denormal; clear j low bits */
if (j >= 32) {
word1(&rv) = 0;
if (j >= 53)
word0(&rv) = (P+2)*Exp_msk1;
else
word0(&rv) &= 0xffffffff << (j-32);
}
else
word1(&rv) &= 0xffffffff << j;
}
if (!dval(&rv))
goto undfl;
}
}
/* Now the hard part -- adjusting rv to the correct value.*/
/* Put digits into bd: true value = bd * 10^e */
bc.nd = nd;
bc.nd0 = nd0; /* Only needed if nd > STRTOD_DIGLIM, but done here */
/* to silence an erroneous warning about bc.nd0 */
/* possibly not being initialized. */
if (nd > STRTOD_DIGLIM) {
/* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */
/* minimum number of decimal digits to distinguish double values */
/* in IEEE arithmetic. */
/* Truncate input to 18 significant digits, then discard any trailing
zeros on the result by updating nd, nd0, e and y suitably. (There's
no need to update z; it's not reused beyond this point.) */
for (i = 18; i > 0; ) {
/* scan back until we hit a nonzero digit. significant digit 'i'
is s0[i] if i < nd0, s0[i+1] if i >= nd0. */
--i;
if (s0[i < nd0 ? i : i+1] != '0') {
++i;
break;
}
}
e += nd - i;
nd = i;
if (nd0 > nd)
nd0 = nd;
if (nd < 9) { /* must recompute y */
y = 0;
for(i = 0; i < nd0; ++i)
y = 10*y + s0[i] - '0';
for(; i < nd; ++i)
y = 10*y + s0[i+1] - '0';
}
}
bd0 = s2b(s0, nd0, nd, y);
if (bd0 == NULL)
goto failed_malloc;
/* Notation for the comments below. Write:
- dv for the absolute value of the number represented by the original
decimal input string.
- if we've truncated dv, write tdv for the truncated value.
Otherwise, set tdv == dv.
- srv for the quantity rv/2^bc.scale; so srv is the current binary
approximation to tdv (and dv). It should be exactly representable
in an IEEE 754 double.
*/
for(;;) {
/* This is the main correction loop for _Py_dg_strtod.
We've got a decimal value tdv, and a floating-point approximation
srv=rv/2^bc.scale to tdv. The aim is to determine whether srv is
close enough (i.e., within 0.5 ulps) to tdv, and to compute a new
approximation if not.
To determine whether srv is close enough to tdv, compute integers
bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv)
respectively, and then use integer arithmetic to determine whether
|tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv).
*/
bd = Balloc(bd0->k);
if (bd == NULL) {
Bfree(bd0);
goto failed_malloc;
}
Bcopy(bd, bd0);
bb = sd2b(&rv, bc.scale, &bbe); /* srv = bb * 2^bbe */
if (bb == NULL) {
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
/* Record whether lsb of bb is odd, in case we need this
for the round-to-even step later. */
odd = bb->x[0] & 1;
/* tdv = bd * 10**e; srv = bb * 2**bbe */
bs = i2b(1);
if (bs == NULL) {
Bfree(bb);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
if (e >= 0) {
bb2 = bb5 = 0;
bd2 = bd5 = e;
}
else {
bb2 = bb5 = -e;
bd2 = bd5 = 0;
}
if (bbe >= 0)
bb2 += bbe;
else
bd2 -= bbe;
bs2 = bb2;
bb2++;
bd2++;
/* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
and bs == 1, so:
tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
It follows that:
M * tdv = bd * 2**bd2 * 5**bd5
M * srv = bb * 2**bb2 * 5**bb5
M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
this fact is not needed below.)
*/
/* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */
i = bb2 < bd2 ? bb2 : bd2;
if (i > bs2)
i = bs2;
if (i > 0) {
bb2 -= i;
bd2 -= i;
bs2 -= i;
}
/* Scale bb, bd, bs by the appropriate powers of 2 and 5. */
if (bb5 > 0) {
bs = pow5mult(bs, bb5);
if (bs == NULL) {
Bfree(bb);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
bb1 = mult(bs, bb);
Bfree(bb);
bb = bb1;
if (bb == NULL) {
Bfree(bs);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
}
if (bb2 > 0) {
bb = lshift(bb, bb2);
if (bb == NULL) {
Bfree(bs);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
}
if (bd5 > 0) {
bd = pow5mult(bd, bd5);
if (bd == NULL) {
Bfree(bb);
Bfree(bs);
Bfree(bd0);
goto failed_malloc;
}
}
if (bd2 > 0) {
bd = lshift(bd, bd2);
if (bd == NULL) {
Bfree(bb);
Bfree(bs);
Bfree(bd0);
goto failed_malloc;
}
}
if (bs2 > 0) {
bs = lshift(bs, bs2);
if (bs == NULL) {
Bfree(bb);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
}
/* Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv),
respectively. Compute the difference |tdv - srv|, and compare
with 0.5 ulp(srv). */
delta = diff(bb, bd);
if (delta == NULL) {
Bfree(bb);
Bfree(bs);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
dsign = delta->sign;
delta->sign = 0;
i = cmp(delta, bs);
if (bc.nd > nd && i <= 0) {
if (dsign)
break; /* Must use bigcomp(). */
/* Here rv overestimates the truncated decimal value by at most
0.5 ulp(rv). Hence rv either overestimates the true decimal
value by <= 0.5 ulp(rv), or underestimates it by some small
amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of
the true decimal value, so it's possible to exit.
Exception: if scaled rv is a normal exact power of 2, but not
DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the
next double, so the correctly rounded result is either rv - 0.5
ulp(rv) or rv; in this case, use bigcomp to distinguish. */
if (!word1(&rv) && !(word0(&rv) & Bndry_mask)) {
/* rv can't be 0, since it's an overestimate for some
nonzero value. So rv is a normal power of 2. */
j = (int)(word0(&rv) & Exp_mask) >> Exp_shift;
/* rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if
rv / 2^bc.scale >= 2^-1021. */
if (j - bc.scale >= 2) {
dval(&rv) -= 0.5 * sulp(&rv, &bc);
break; /* Use bigcomp. */
}
}
{
bc.nd = nd;
i = -1; /* Discarded digits make delta smaller. */
}
}
if (i < 0) {
/* Error is less than half an ulp -- check for
* special case of mantissa a power of two.
*/
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask
|| (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1
) {
break;
}
if (!delta->x[0] && delta->wds <= 1) {
/* exact result */
break;
}
delta = lshift(delta,Log2P);
if (delta == NULL) {
Bfree(bb);
Bfree(bs);
Bfree(bd);
Bfree(bd0);
goto failed_malloc;
}
if (cmp(delta, bs) > 0)
goto drop_down;
break;
}
if (i == 0) {
/* exactly half-way between */
if (dsign) {
if ((word0(&rv) & Bndry_mask1) == Bndry_mask1
&& word1(&rv) == (
(bc.scale &&
(y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1) ?
(0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
0xffffffff)) {
/*boundary case -- increment exponent*/
word0(&rv) = (word0(&rv) & Exp_mask)
+ Exp_msk1
;
word1(&rv) = 0;
dsign = 0;
break;
}
}
else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) {
drop_down:
/* boundary case -- decrement exponent */
if (bc.scale) {
L = word0(&rv) & Exp_mask;
if (L <= (2*P+1)*Exp_msk1) {
if (L > (P+2)*Exp_msk1)
/* round even ==> */
/* accept rv */
break;
/* rv = smallest denormal */
if (bc.nd > nd)
break;
goto undfl;
}
}
L = (word0(&rv) & Exp_mask) - Exp_msk1;
word0(&rv) = L | Bndry_mask1;
word1(&rv) = 0xffffffff;
break;
}
if (!odd)
break;
if (dsign)
dval(&rv) += sulp(&rv, &bc);
else {
dval(&rv) -= sulp(&rv, &bc);
if (!dval(&rv)) {
if (bc.nd >nd)
break;
goto undfl;
}
}
dsign = 1 - dsign;
break;
}
if ((aadj = ratio(delta, bs)) <= 2.) {
if (dsign)
aadj = aadj1 = 1.;
else if (word1(&rv) || word0(&rv) & Bndry_mask) {
if (word1(&rv) == Tiny1 && !word0(&rv)) {
if (bc.nd >nd)
break;
goto undfl;
}
aadj = 1.;
aadj1 = -1.;
}
else {
/* special case -- power of FLT_RADIX to be */
/* rounded down... */
if (aadj < 2./FLT_RADIX)
aadj = 1./FLT_RADIX;
else
aadj *= 0.5;
aadj1 = -aadj;
}
}
else {
aadj *= 0.5;
aadj1 = dsign ? aadj : -aadj;
if (Flt_Rounds == 0)
aadj1 += 0.5;
}
y = word0(&rv) & Exp_mask;
/* Check for overflow */
if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) {
dval(&rv0) = dval(&rv);
word0(&rv) -= P*Exp_msk1;
adj.d = aadj1 * ulp(&rv);
dval(&rv) += adj.d;
if ((word0(&rv) & Exp_mask) >=
Exp_msk1*(DBL_MAX_EXP+Bias-P)) {
if (word0(&rv0) == Big0 && word1(&rv0) == Big1) {
Bfree(bb);
Bfree(bd);
Bfree(bs);
Bfree(bd0);
Bfree(delta);
goto ovfl;
}
word0(&rv) = Big0;
word1(&rv) = Big1;
goto cont;
}
else
word0(&rv) += P*Exp_msk1;
}
else {
if (bc.scale && y <= 2*P*Exp_msk1) {
if (aadj <= 0x7fffffff) {
if ((z = (ULong)aadj) <= 0)
z = 1;
aadj = z;
aadj1 = dsign ? aadj : -aadj;
}
dval(&aadj2) = aadj1;
word0(&aadj2) += (2*P+1)*Exp_msk1 - y;
aadj1 = dval(&aadj2);
}
adj.d = aadj1 * ulp(&rv);
dval(&rv) += adj.d;
}
z = word0(&rv) & Exp_mask;
if (bc.nd == nd) {
if (!bc.scale)
if (y == z) {
/* Can we stop now? */
L = (Long)aadj;
aadj -= L;
/* The tolerances below are conservative. */
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) {
if (aadj < .4999999 || aadj > .5000001)
break;
}
else if (aadj < .4999999/FLT_RADIX)
break;
}
}
cont:
Bfree(bb);
Bfree(bd);
Bfree(bs);
Bfree(delta);
}
Bfree(bb);
Bfree(bd);
Bfree(bs);
Bfree(bd0);
Bfree(delta);
if (bc.nd > nd) {
error = bigcomp(&rv, s0, &bc);
if (error)
goto failed_malloc;
}
if (bc.scale) {
word0(&rv0) = Exp_1 - 2*P*Exp_msk1;
word1(&rv0) = 0;
dval(&rv) *= dval(&rv0);
}
ret:
return sign ? -dval(&rv) : dval(&rv);
parse_error:
return 0.0;
failed_malloc:
errno = ENOMEM;
return -1.0;
undfl:
return sign ? -0.0 : 0.0;
ovfl:
errno = ERANGE;
/* Can't trust HUGE_VAL */
word0(&rv) = Exp_mask;
word1(&rv) = 0;
return sign ? -dval(&rv) : dval(&rv);
}
static char *
rv_alloc(int i)
{
int j, k, *r;
j = sizeof(ULong);
for(k = 0;
sizeof(Bigint) - sizeof(ULong) - sizeof(int) + j <= (unsigned)i;
j <<= 1)
k++;
r = (int*)Balloc(k);
if (r == NULL)
return NULL;
*r = k;
return (char *)(r+1);
}
static char *
nrv_alloc(char *s, char **rve, int n)
{
char *rv, *t;
rv = rv_alloc(n);
if (rv == NULL)
return NULL;
t = rv;
while((*t = *s++)) t++;
if (rve)
*rve = t;
return rv;
}
/* freedtoa(s) must be used to free values s returned by dtoa
* when MULTIPLE_THREADS is #defined. It should be used in all cases,
* but for consistency with earlier versions of dtoa, it is optional
* when MULTIPLE_THREADS is not defined.
*/
void
_Py_dg_freedtoa(char *s)
{
Bigint *b = (Bigint *)((int *)s - 1);
b->maxwds = 1 << (b->k = *(int*)b);
Bfree(b);
}
/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
*
* Inspired by "How to Print Floating-Point Numbers Accurately" by
* Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126].
*
* Modifications:
* 1. Rather than iterating, we use a simple numeric overestimate
* to determine k = floor(log10(d)). We scale relevant
* quantities using O(log2(k)) rather than O(k) multiplications.
* 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
* try to generate digits strictly left to right. Instead, we
* compute with fewer bits and propagate the carry if necessary
* when rounding the final digit up. This is often faster.
* 3. Under the assumption that input will be rounded nearest,
* mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
* That is, we allow equality in stopping tests when the
* round-nearest rule will give the same floating-point value
* as would satisfaction of the stopping test with strict
* inequality.
* 4. We remove common factors of powers of 2 from relevant
* quantities.
* 5. When converting floating-point integers less than 1e16,
* we use floating-point arithmetic rather than resorting
* to multiple-precision integers.
* 6. When asked to produce fewer than 15 digits, we first try
* to get by with floating-point arithmetic; we resort to
* multiple-precision integer arithmetic only if we cannot
* guarantee that the floating-point calculation has given
* the correctly rounded result. For k requested digits and
* "uniformly" distributed input, the probability is
* something like 10^(k-15) that we must resort to the Long
* calculation.
*/
/* Additional notes (METD): (1) returns NULL on failure. (2) to avoid memory
leakage, a successful call to _Py_dg_dtoa should always be matched by a
call to _Py_dg_freedtoa. */
char *
_Py_dg_dtoa(double dd, int mode, int ndigits,
int *decpt, int *sign, char **rve)
{
/* Arguments ndigits, decpt, sign are similar to those
of ecvt and fcvt; trailing zeros are suppressed from
the returned string. If not null, *rve is set to point
to the end of the return value. If d is +-Infinity or NaN,
then *decpt is set to 9999.
mode:
0 ==> shortest string that yields d when read in
and rounded to nearest.
1 ==> like 0, but with Steele & White stopping rule;
e.g. with IEEE P754 arithmetic , mode 0 gives
1e23 whereas mode 1 gives 9.999999999999999e22.
2 ==> max(1,ndigits) significant digits. This gives a
return value similar to that of ecvt, except
that trailing zeros are suppressed.
3 ==> through ndigits past the decimal point. This
gives a return value similar to that from fcvt,
except that trailing zeros are suppressed, and
ndigits can be negative.
4,5 ==> similar to 2 and 3, respectively, but (in
round-nearest mode) with the tests of mode 0 to
possibly return a shorter string that rounds to d.
With IEEE arithmetic and compilation with
-DHonor_FLT_ROUNDS, modes 4 and 5 behave the same
as modes 2 and 3 when FLT_ROUNDS != 1.
6-9 ==> Debugging modes similar to mode - 4: don't try
fast floating-point estimate (if applicable).
Values of mode other than 0-9 are treated as mode 0.
Sufficient space is allocated to the return value
to hold the suppressed trailing zeros.
*/
int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1,
j, j1, k, k0, k_check, leftright, m2, m5, s2, s5,
spec_case, try_quick;
Long L;
int denorm;
ULong x;
Bigint *b, *b1, *delta, *mlo, *mhi, *S;
U d2, eps, u;
double ds;
char *s, *s0;
/* set pointers to NULL, to silence gcc compiler warnings and make
cleanup easier on error */
mlo = mhi = S = 0;
s0 = 0;
u.d = dd;
if (word0(&u) & Sign_bit) {
/* set sign for everything, including 0's and NaNs */
*sign = 1;
word0(&u) &= ~Sign_bit; /* clear sign bit */
}
else
*sign = 0;
/* quick return for Infinities, NaNs and zeros */
if ((word0(&u) & Exp_mask) == Exp_mask)
{
/* Infinity or NaN */
*decpt = 9999;
if (!word1(&u) && !(word0(&u) & 0xfffff))
return nrv_alloc("Infinity", rve, 8);
return nrv_alloc("NaN", rve, 3);
}
if (!dval(&u)) {
*decpt = 1;
return nrv_alloc("0", rve, 1);
}
/* compute k = floor(log10(d)). The computation may leave k
one too large, but should never leave k too small. */
b = d2b(&u, &be, &bbits);
if (b == NULL)
goto failed_malloc;
if ((i = (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1)))) {
dval(&d2) = dval(&u);
word0(&d2) &= Frac_mask1;
word0(&d2) |= Exp_11;
/* log(x) ~=~ log(1.5) + (x-1.5)/1.5
* log10(x) = log(x) / log(10)
* ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
* log10(d) = (i-Bias)*log(2)/log(10) + log10(d2)
*
* This suggests computing an approximation k to log10(d) by
*
* k = (i - Bias)*0.301029995663981
* + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
*
* We want k to be too large rather than too small.
* The error in the first-order Taylor series approximation
* is in our favor, so we just round up the constant enough
* to compensate for any error in the multiplication of
* (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077,
* and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
* adding 1e-13 to the constant term more than suffices.
* Hence we adjust the constant term to 0.1760912590558.
* (We could get a more accurate k by invoking log10,
* but this is probably not worthwhile.)
*/
i -= Bias;
denorm = 0;
}
else {
/* d is denormalized */
i = bbits + be + (Bias + (P-1) - 1);
x = i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32)
: word1(&u) << (32 - i);
dval(&d2) = x;
word0(&d2) -= 31*Exp_msk1; /* adjust exponent */
i -= (Bias + (P-1) - 1) + 1;
denorm = 1;
}
ds = (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 +
i*0.301029995663981;
k = (int)ds;
if (ds < 0. && ds != k)
k--; /* want k = floor(ds) */
k_check = 1;
if (k >= 0 && k <= Ten_pmax) {
if (dval(&u) < tens[k])
k--;
k_check = 0;
}
j = bbits - i - 1;
if (j >= 0) {
b2 = 0;
s2 = j;
}
else {
b2 = -j;
s2 = 0;
}
if (k >= 0) {
b5 = 0;
s5 = k;
s2 += k;
}
else {
b2 -= k;
b5 = -k;
s5 = 0;
}
if (mode < 0 || mode > 9)
mode = 0;
try_quick = 1;
if (mode > 5) {
mode -= 4;
try_quick = 0;
}
leftright = 1;
ilim = ilim1 = -1; /* Values for cases 0 and 1; done here to */
/* silence erroneous "gcc -Wall" warning. */
switch(mode) {
case 0:
case 1:
i = 18;
ndigits = 0;
break;
case 2:
leftright = 0;
/* no break */
case 4:
if (ndigits <= 0)
ndigits = 1;
ilim = ilim1 = i = ndigits;
break;
case 3:
leftright = 0;
/* no break */
case 5:
i = ndigits + k + 1;
ilim = i;
ilim1 = i - 1;
if (i <= 0)
i = 1;
}
s0 = rv_alloc(i);
if (s0 == NULL)
goto failed_malloc;
s = s0;
if (ilim >= 0 && ilim <= Quick_max && try_quick) {
/* Try to get by with floating-point arithmetic. */
i = 0;
dval(&d2) = dval(&u);
k0 = k;
ilim0 = ilim;
ieps = 2; /* conservative */
if (k > 0) {
ds = tens[k&0xf];
j = k >> 4;
if (j & Bletch) {
/* prevent overflows */
j &= Bletch - 1;
dval(&u) /= bigtens[n_bigtens-1];
ieps++;
}
for(; j; j >>= 1, i++)
if (j & 1) {
ieps++;
ds *= bigtens[i];
}
dval(&u) /= ds;
}
else if ((j1 = -k)) {
dval(&u) *= tens[j1 & 0xf];
for(j = j1 >> 4; j; j >>= 1, i++)
if (j & 1) {
ieps++;
dval(&u) *= bigtens[i];
}
}
if (k_check && dval(&u) < 1. && ilim > 0) {
if (ilim1 <= 0)
goto fast_failed;
ilim = ilim1;
k--;
dval(&u) *= 10.;
ieps++;
}
dval(&eps) = ieps*dval(&u) + 7.;
word0(&eps) -= (P-1)*Exp_msk1;
if (ilim == 0) {
S = mhi = 0;
dval(&u) -= 5.;
if (dval(&u) > dval(&eps))
goto one_digit;
if (dval(&u) < -dval(&eps))
goto no_digits;
goto fast_failed;
}
if (leftright) {
/* Use Steele & White method of only
* generating digits needed.
*/
dval(&eps) = 0.5/tens[ilim-1] - dval(&eps);
for(i = 0;;) {
L = (Long)dval(&u);
dval(&u) -= L;
*s++ = '0' + (int)L;
if (dval(&u) < dval(&eps))
goto ret1;
if (1. - dval(&u) < dval(&eps))
goto bump_up;
if (++i >= ilim)
break;
dval(&eps) *= 10.;
dval(&u) *= 10.;
}
}
else {
/* Generate ilim digits, then fix them up. */
dval(&eps) *= tens[ilim-1];
for(i = 1;; i++, dval(&u) *= 10.) {
L = (Long)(dval(&u));
if (!(dval(&u) -= L))
ilim = i;
*s++ = '0' + (int)L;
if (i == ilim) {
if (dval(&u) > 0.5 + dval(&eps))
goto bump_up;
else if (dval(&u) < 0.5 - dval(&eps)) {
while(*--s == '0');
s++;
goto ret1;
}
break;
}
}
}
fast_failed:
s = s0;
dval(&u) = dval(&d2);
k = k0;
ilim = ilim0;
}
/* Do we have a "small" integer? */
if (be >= 0 && k <= Int_max) {
/* Yes. */
ds = tens[k];
if (ndigits < 0 && ilim <= 0) {
S = mhi = 0;
if (ilim < 0 || dval(&u) <= 5*ds)
goto no_digits;
goto one_digit;
}
for(i = 1;; i++, dval(&u) *= 10.) {
L = (Long)(dval(&u) / ds);
dval(&u) -= L*ds;
*s++ = '0' + (int)L;
if (!dval(&u)) {
break;
}
if (i == ilim) {
dval(&u) += dval(&u);
if (dval(&u) > ds || (dval(&u) == ds && L & 1)) {
bump_up:
while(*--s == '9')
if (s == s0) {
k++;
*s = '0';
break;
}
++*s++;
}
break;
}
}
goto ret1;
}
m2 = b2;
m5 = b5;
if (leftright) {
i =
denorm ? be + (Bias + (P-1) - 1 + 1) :
1 + P - bbits;
b2 += i;
s2 += i;
mhi = i2b(1);
if (mhi == NULL)
goto failed_malloc;
}
if (m2 > 0 && s2 > 0) {
i = m2 < s2 ? m2 : s2;
b2 -= i;
m2 -= i;
s2 -= i;
}
if (b5 > 0) {
if (leftright) {
if (m5 > 0) {
mhi = pow5mult(mhi, m5);
if (mhi == NULL)
goto failed_malloc;
b1 = mult(mhi, b);
Bfree(b);
b = b1;
if (b == NULL)
goto failed_malloc;
}
if ((j = b5 - m5)) {
b = pow5mult(b, j);
if (b == NULL)
goto failed_malloc;
}
}
else {
b = pow5mult(b, b5);
if (b == NULL)
goto failed_malloc;
}
}
S = i2b(1);
if (S == NULL)
goto failed_malloc;
if (s5 > 0) {
S = pow5mult(S, s5);
if (S == NULL)
goto failed_malloc;
}
/* Check for special case that d is a normalized power of 2. */
spec_case = 0;
if ((mode < 2 || leftright)
) {
if (!word1(&u) && !(word0(&u) & Bndry_mask)
&& word0(&u) & (Exp_mask & ~Exp_msk1)
) {
/* The special case */
b2 += Log2P;
s2 += Log2P;
spec_case = 1;
}
}
/* Arrange for convenient computation of quotients:
* shift left if necessary so divisor has 4 leading 0 bits.
*
* Perhaps we should just compute leading 28 bits of S once
* and for all and pass them and a shift to quorem, so it
* can do shifts and ors to compute the numerator for q.
*/
#define iInc 28
i = dshift(S, s2);
b2 += i;
m2 += i;
s2 += i;
if (b2 > 0) {
b = lshift(b, b2);
if (b == NULL)
goto failed_malloc;
}
if (s2 > 0) {
S = lshift(S, s2);
if (S == NULL)
goto failed_malloc;
}
if (k_check) {
if (cmp(b,S) < 0) {
k--;
b = multadd(b, 10, 0); /* we botched the k estimate */
if (b == NULL)
goto failed_malloc;
if (leftright) {
mhi = multadd(mhi, 10, 0);
if (mhi == NULL)
goto failed_malloc;
}
ilim = ilim1;
}
}
if (ilim <= 0 && (mode == 3 || mode == 5)) {
if (ilim < 0) {
/* no digits, fcvt style */
no_digits:
k = -1 - ndigits;
goto ret;
}
else {
S = multadd(S, 5, 0);
if (S == NULL)
goto failed_malloc;
if (cmp(b, S) <= 0)
goto no_digits;
}
one_digit:
*s++ = '1';
k++;
goto ret;
}
if (leftright) {
if (m2 > 0) {
mhi = lshift(mhi, m2);
if (mhi == NULL)
goto failed_malloc;
}
/* Compute mlo -- check for special case
* that d is a normalized power of 2.
*/
mlo = mhi;
if (spec_case) {
mhi = Balloc(mhi->k);
if (mhi == NULL)
goto failed_malloc;
Bcopy(mhi, mlo);
mhi = lshift(mhi, Log2P);
if (mhi == NULL)
goto failed_malloc;
}
for(i = 1;;i++) {
dig = quorem(b,S) + '0';
/* Do we yet have the shortest decimal string
* that will round to d?
*/
j = cmp(b, mlo);
delta = diff(S, mhi);
if (delta == NULL)
goto failed_malloc;
j1 = delta->sign ? 1 : cmp(b, delta);
Bfree(delta);
if (j1 == 0 && mode != 1 && !(word1(&u) & 1)
) {
if (dig == '9')
goto round_9_up;
if (j > 0)
dig++;
*s++ = dig;
goto ret;
}
if (j < 0 || (j == 0 && mode != 1
&& !(word1(&u) & 1)
)) {
if (!b->x[0] && b->wds <= 1) {
goto accept_dig;
}
if (j1 > 0) {
b = lshift(b, 1);
if (b == NULL)
goto failed_malloc;
j1 = cmp(b, S);
if ((j1 > 0 || (j1 == 0 && dig & 1))
&& dig++ == '9')
goto round_9_up;
}
accept_dig:
*s++ = dig;
goto ret;
}
if (j1 > 0) {
if (dig == '9') { /* possible if i == 1 */
round_9_up:
*s++ = '9';
goto roundoff;
}
*s++ = dig + 1;
goto ret;
}
*s++ = dig;
if (i == ilim)
break;
b = multadd(b, 10, 0);
if (b == NULL)
goto failed_malloc;
if (mlo == mhi) {
mlo = mhi = multadd(mhi, 10, 0);
if (mlo == NULL)
goto failed_malloc;
}
else {
mlo = multadd(mlo, 10, 0);
if (mlo == NULL)
goto failed_malloc;
mhi = multadd(mhi, 10, 0);
if (mhi == NULL)
goto failed_malloc;
}
}
}
else
for(i = 1;; i++) {
*s++ = dig = quorem(b,S) + '0';
if (!b->x[0] && b->wds <= 1) {
goto ret;
}
if (i >= ilim)
break;
b = multadd(b, 10, 0);
if (b == NULL)
goto failed_malloc;
}
/* Round off last digit */
b = lshift(b, 1);
if (b == NULL)
goto failed_malloc;
j = cmp(b, S);
if (j > 0 || (j == 0 && dig & 1)) {
roundoff:
while(*--s == '9')
if (s == s0) {
k++;
*s++ = '1';
goto ret;
}
++*s++;
}
else {
while(*--s == '0');
s++;
}
ret:
Bfree(S);
if (mhi) {
if (mlo && mlo != mhi)
Bfree(mlo);
Bfree(mhi);
}
ret1:
Bfree(b);
*s = 0;
*decpt = k + 1;
if (rve)
*rve = s;
return s0;
failed_malloc:
if (S)
Bfree(S);
if (mlo && mlo != mhi)
Bfree(mlo);
if (mhi)
Bfree(mhi);
if (b)
Bfree(b);
if (s0)
_Py_dg_freedtoa(s0);
return NULL;
}
#ifdef __cplusplus
}
#endif
#endif /* PY_NO_SHORT_FLOAT_REPR */
// This file is originally from CPython 2.7, with modifications for Pyston
/***********************************************************************/
/* Implements the string (as opposed to unicode) version of the
built-in formatters for string, int, float. That is, the versions
of int.__format__, etc., that take and return string objects */
#include "Python.h"
// Pyston change: had to change this path
#include "../2.7_Objects/stringlib/stringdefs.h"
#define FORMAT_STRING _PyBytes_FormatAdvanced
#define FORMAT_LONG _PyLong_FormatAdvanced
#define FORMAT_INT _PyInt_FormatAdvanced
#define FORMAT_FLOAT _PyFloat_FormatAdvanced
#ifndef WITHOUT_COMPLEX
#define FORMAT_COMPLEX _PyComplex_FormatAdvanced
#endif
// Pyston change: had to change this path
#include "../2.7_Objects/stringlib/formatter.h"
// This file is originally from CPython 2.7, with modifications for Pyston
/* -*- Mode: C; c-file-style: "python" -*- */
#include <Python.h>
#include <locale.h>
/* Case-insensitive string match used for nan and inf detection; t should be
lower-case. Returns 1 for a successful match, 0 otherwise. */
static int
case_insensitive_match(const char *s, const char *t)
{
while(*t && Py_TOLOWER(*s) == *t) {
s++;
t++;
}
return *t ? 0 : 1;
}
/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
"infinity", with an optional leading sign of "+" or "-". On success,
return the NaN or Infinity as a double and set *endptr to point just beyond
the successfully parsed portion of the string. On failure, return -1.0 and
set *endptr to point to the start of the string. */
double
_Py_parse_inf_or_nan(const char *p, char **endptr)
{
double retval;
const char *s;
int negate = 0;
s = p;
if (*s == '-') {
negate = 1;
s++;
}
else if (*s == '+') {
s++;
}
if (case_insensitive_match(s, "inf")) {
s += 3;
if (case_insensitive_match(s, "inity"))
s += 5;
retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
}
#ifdef Py_NAN
else if (case_insensitive_match(s, "nan")) {
s += 3;
retval = negate ? -Py_NAN : Py_NAN;
}
#endif
else {
s = p;
retval = -1.0;
}
*endptr = (char *)s;
return retval;
}
/**
* PyOS_ascii_strtod:
* @nptr: the string to convert to a numeric value.
* @endptr: if non-%NULL, it returns the character after
* the last character used in the conversion.
*
* Converts a string to a #gdouble value.
* This function behaves like the standard strtod() function
* does in the C locale. It does this without actually
* changing the current locale, since that would not be
* thread-safe.
*
* This function is typically used when reading configuration
* files or other non-user input that should be locale independent.
* To handle input from the user you should normally use the
* locale-sensitive system strtod() function.
*
* If the correct value would cause overflow, plus or minus %HUGE_VAL
* is returned (according to the sign of the value), and %ERANGE is
* stored in %errno. If the correct value would cause underflow,
* zero is returned and %ERANGE is stored in %errno.
* If memory allocation fails, %ENOMEM is stored in %errno.
*
* This function resets %errno before calling strtod() so that
* you can reliably detect overflow and underflow.
*
* Return value: the #gdouble value.
**/
#ifndef PY_NO_SHORT_FLOAT_REPR
double
_PyOS_ascii_strtod(const char *nptr, char **endptr)
{
double result;
_Py_SET_53BIT_PRECISION_HEADER;
assert(nptr != NULL);
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
_Py_SET_53BIT_PRECISION_START;
result = _Py_dg_strtod(nptr, endptr);
_Py_SET_53BIT_PRECISION_END;
if (*endptr == nptr)
/* string might represent an inf or nan */
result = _Py_parse_inf_or_nan(nptr, endptr);
return result;
}
#else
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double
_PyOS_ascii_strtod(const char *nptr, char **endptr)
{
char *fail_pos;
double val = -1.0;
struct lconv *locale_data;
const char *decimal_point;
size_t decimal_point_len;
const char *p, *decimal_point_pos;
const char *end = NULL; /* Silence gcc */
const char *digits_pos = NULL;
int negate = 0;
assert(nptr != NULL);
fail_pos = NULL;
locale_data = localeconv();
decimal_point = locale_data->decimal_point;
decimal_point_len = strlen(decimal_point);
assert(decimal_point_len != 0);
decimal_point_pos = NULL;
/* Parse infinities and nans */
val = _Py_parse_inf_or_nan(nptr, endptr);
if (*endptr != nptr)
return val;
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
/* We process the optional sign manually, then pass the remainder to
the system strtod. This ensures that the result of an underflow
has the correct sign. (bug #1725) */
p = nptr;
/* Process leading sign, if present */
if (*p == '-') {
negate = 1;
p++;
}
else if (*p == '+') {
p++;
}
/* Some platform strtods accept hex floats; Python shouldn't (at the
moment), so we check explicitly for strings starting with '0x'. */
if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
goto invalid_string;
/* Check that what's left begins with a digit or decimal point */
if (!Py_ISDIGIT(*p) && *p != '.')
goto invalid_string;
digits_pos = p;
if (decimal_point[0] != '.' ||
decimal_point[1] != 0)
{
/* Look for a '.' in the input; if present, it'll need to be
swapped for the current locale's decimal point before we
call strtod. On the other hand, if we find the current
locale's decimal point then the input is invalid. */
while (Py_ISDIGIT(*p))
p++;
if (*p == '.')
{
decimal_point_pos = p++;
/* locate end of number */
while (Py_ISDIGIT(*p))
p++;
if (*p == 'e' || *p == 'E')
p++;
if (*p == '+' || *p == '-')
p++;
while (Py_ISDIGIT(*p))
p++;
end = p;
}
else if (strncmp(p, decimal_point, decimal_point_len) == 0)
/* Python bug #1417699 */
goto invalid_string;
/* For the other cases, we need not convert the decimal
point */
}
if (decimal_point_pos) {
char *copy, *c;
/* Create a copy of the input, with the '.' converted to the
locale-specific decimal point */
copy = (char *)PyMem_MALLOC(end - digits_pos +
1 + decimal_point_len);
if (copy == NULL) {
*endptr = (char *)nptr;
errno = ENOMEM;
return val;
}
c = copy;
memcpy(c, digits_pos, decimal_point_pos - digits_pos);
c += decimal_point_pos - digits_pos;
memcpy(c, decimal_point, decimal_point_len);
c += decimal_point_len;
memcpy(c, decimal_point_pos + 1,
end - (decimal_point_pos + 1));
c += end - (decimal_point_pos + 1);
*c = 0;
val = strtod(copy, &fail_pos);
if (fail_pos)
{
if (fail_pos > decimal_point_pos)
fail_pos = (char *)digits_pos +
(fail_pos - copy) -
(decimal_point_len - 1);
else
fail_pos = (char *)digits_pos +
(fail_pos - copy);
}
PyMem_FREE(copy);
}
else {
val = strtod(digits_pos, &fail_pos);
}
if (fail_pos == digits_pos)
goto invalid_string;
if (negate && fail_pos != nptr)
val = -val;
*endptr = fail_pos;
return val;
invalid_string:
*endptr = (char*)nptr;
errno = EINVAL;
return -1.0;
}
#endif
/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
char *fail_pos;
const char *p;
double x;
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PyOS_ascii_strtod and PyOS_ascii_atof are "
"deprecated. Use PyOS_string_to_double "
"instead.", 1) < 0)
return -1.0;
/* _PyOS_ascii_strtod already does everything that we want,
except that it doesn't parse leading whitespace */
p = nptr;
while (Py_ISSPACE(*p))
p++;
x = _PyOS_ascii_strtod(p, &fail_pos);
if (fail_pos == p)
fail_pos = (char *)nptr;
if (endptr)
*endptr = (char *)fail_pos;
return x;
}
/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* PyOS_string_to_double is the recommended replacement for the deprecated
PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a
null-terminated byte string s (interpreted as a string of ASCII characters)
to a float. The string should not have leading or trailing whitespace (in
contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
whitespace). The conversion is independent of the current locale.
If endptr is NULL, try to convert the whole string. Raise ValueError and
return -1.0 if the string is not a valid representation of a floating-point
number.
If endptr is non-NULL, try to convert as much of the string as possible.
If no initial segment of the string is the valid representation of a
floating-point number then *endptr is set to point to the beginning of the
string, -1.0 is returned and again ValueError is raised.
On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
exception is raised. Otherwise, overflow_exception should point to a
a Python exception, this exception will be raised, -1.0 will be returned,
and *endptr will point just past the end of the converted value.
If any other failure occurs (for example lack of memory), -1.0 is returned
and the appropriate Python exception will have been set.
*/
double
PyOS_string_to_double(const char *s,
char **endptr,
PyObject *overflow_exception)
{
double x, result=-1.0;
char *fail_pos;
errno = 0;
PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
x = _PyOS_ascii_strtod(s, &fail_pos);
PyFPE_END_PROTECT(x)
if (errno == ENOMEM) {
PyErr_NoMemory();
fail_pos = (char *)s;
}
else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
PyErr_Format(PyExc_ValueError,
"could not convert string to float: "
"%.200s", s);
else if (fail_pos == s)
PyErr_Format(PyExc_ValueError,
"could not convert string to float: "
"%.200s", s);
else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
PyErr_Format(overflow_exception,
"value too large to convert to float: "
"%.200s", s);
else
result = x;
if (endptr != NULL)
*endptr = fail_pos;
return result;
}
/* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */
Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char* buffer)
{
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
size_t decimal_point_len = strlen(decimal_point);
if (*buffer == '+' || *buffer == '-')
buffer++;
while (Py_ISDIGIT(*buffer))
buffer++;
if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
*buffer = '.';
buffer++;
if (decimal_point_len > 1) {
/* buffer needs to get smaller */
size_t rest_len = strlen(buffer +
(decimal_point_len - 1));
memmove(buffer,
buffer + (decimal_point_len - 1),
rest_len);
buffer[rest_len] = 0;
}
}
}
}
/* From the C99 standard, section 7.19.6:
The exponent always contains at least two digits, and only as many more digits
as necessary to represent the exponent.
*/
#define MIN_EXPONENT_DIGITS 2
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
in length. */
Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char* buffer, size_t buf_size)
{
char *p = strpbrk(buffer, "eE");
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
char *start = p + 2;
int exponent_digit_cnt = 0;
int leading_zero_cnt = 0;
int in_leading_zeros = 1;
int significant_digit_cnt;
/* Skip over the exponent and the sign. */
p += 2;
/* Find the end of the exponent, keeping track of leading
zeros. */
while (*p && Py_ISDIGIT(*p)) {
if (in_leading_zeros && *p == '0')
++leading_zero_cnt;
if (*p != '0')
in_leading_zeros = 0;
++p;
++exponent_digit_cnt;
}
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
/* If there are 2 exactly digits, we're done,
regardless of what they contain */
}
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
int extra_zeros_cnt;
/* There are more than 2 digits in the exponent. See
if we can delete some of the leading zeros */
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
significant_digit_cnt = MIN_EXPONENT_DIGITS;
extra_zeros_cnt = exponent_digit_cnt -
significant_digit_cnt;
/* Delete extra_zeros_cnt worth of characters from the
front of the exponent */
assert(extra_zeros_cnt >= 0);
/* Add one to significant_digit_cnt to copy the
trailing 0 byte, thus setting the length */
memmove(start,
start + extra_zeros_cnt,
significant_digit_cnt + 1);
}
else {
/* If there are fewer than 2 digits, add zeros
until there are 2, if there's enough room */
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
if (start + zeros + exponent_digit_cnt + 1
< buffer + buf_size) {
memmove(start + zeros, start,
exponent_digit_cnt + 1);
memset(start, '0', zeros);
}
}
}
}
/* Remove trailing zeros after the decimal point from a numeric string; also
remove the decimal point if all digits following it are zero. The numeric
string must end in '\0', and should not have any leading or trailing
whitespace. Assumes that the decimal point is '.'. */
Py_LOCAL_INLINE(void)
remove_trailing_zeros(char *buffer)
{
char *old_fraction_end, *new_fraction_end, *end, *p;
p = buffer;
if (*p == '-' || *p == '+')
/* Skip leading sign, if present */
++p;
while (Py_ISDIGIT(*p))
++p;
/* if there's no decimal point there's nothing to do */
if (*p++ != '.')
return;
/* scan any digits after the point */
while (Py_ISDIGIT(*p))
++p;
old_fraction_end = p;
/* scan up to ending '\0' */
while (*p != '\0')
p++;
/* +1 to make sure that we move the null byte as well */
end = p+1;
/* scan back from fraction_end, looking for removable zeros */
p = old_fraction_end;
while (*(p-1) == '0')
--p;
/* and remove point if we've got that far */
if (*(p-1) == '.')
--p;
new_fraction_end = p;
memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
}
/* Ensure that buffer has a decimal point in it. The decimal point will not
be in the current locale, it will always be '.'. Don't add a decimal point
if an exponent is present. Also, convert to exponential notation where
adding a '.0' would produce too many significant digits (see issue 5864).
Returns a pointer to the fixed buffer, or NULL on failure.
*/
Py_LOCAL_INLINE(char *)
ensure_decimal_point(char* buffer, size_t buf_size, int precision)
{
int digit_count, insert_count = 0, convert_to_exp = 0;
char *chars_to_insert, *digits_start;
/* search for the first non-digit character */
char *p = buffer;
if (*p == '-' || *p == '+')
/* Skip leading sign, if present. I think this could only
ever be '-', but it can't hurt to check for both. */
++p;
digits_start = p;
while (*p && Py_ISDIGIT(*p))
++p;
digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
if (*p == '.') {
if (Py_ISDIGIT(*(p+1))) {
/* Nothing to do, we already have a decimal
point and a digit after it */
}
else {
/* We have a decimal point, but no following
digit. Insert a zero after the decimal. */
/* can't ever get here via PyOS_double_to_string */
assert(precision == -1);
++p;
chars_to_insert = "0";
insert_count = 1;
}
}
else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
if (digit_count == precision) {
/* issue 5864: don't add a trailing .0 in the case
where the '%g'-formatted result already has as many
significant digits as were requested. Switch to
exponential notation instead. */
convert_to_exp = 1;
/* no exponent, no point, and we shouldn't land here
for infs and nans, so we must be at the end of the
string. */
assert(*p == '\0');
}
else {
assert(precision == -1 || digit_count < precision);
chars_to_insert = ".0";
insert_count = 2;
}
}
if (insert_count) {
size_t buf_len = strlen(buffer);
if (buf_len + insert_count + 1 >= buf_size) {
/* If there is not enough room in the buffer
for the additional text, just skip it. It's
not worth generating an error over. */
}
else {
memmove(p + insert_count, p,
buffer + strlen(buffer) - p + 1);
memcpy(p, chars_to_insert, insert_count);
}
}
if (convert_to_exp) {
int written;
size_t buf_avail;
p = digits_start;
/* insert decimal point */
assert(digit_count >= 1);
memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
p[1] = '.';
p += digit_count+1;
assert(p <= buf_size+buffer);
buf_avail = buf_size+buffer-p;
if (buf_avail == 0)
return NULL;
/* Add exponent. It's okay to use lower case 'e': we only
arrive here as a result of using the empty format code or
repr/str builtins and those never want an upper case 'E' */
written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
if (!(0 <= written &&
written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
/* output truncated, or something else bad happened */
return NULL;
remove_trailing_zeros(buffer);
}
return buffer;
}
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
/**
* PyOS_ascii_formatd:
* @buffer: A buffer to place the resulting string in
* @buf_size: The length of the buffer.
* @format: The printf()-style format to use for the
* code to use for converting.
* @d: The #gdouble to convert
*
* Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
*
* 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal.
*
* Return value: The pointer to the buffer with the converted string.
* On failure returns NULL but does not set any Python exception.
**/
char *
_PyOS_ascii_formatd(char *buffer,
size_t buf_size,
const char *format,
double d,
int precision)
{
char format_char;
size_t format_len = strlen(format);
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN];
/* The last character in the format string must be the format char */
format_char = format[format_len - 1];
if (format[0] != '%')
return NULL;
/* I'm not sure why this test is here. It's ensuring that the format
string after the first character doesn't have a single quote, a
lowercase l, or a percent. This is the reverse of the commented-out
test about 10 lines ago. */
if (strpbrk(format + 1, "'l%"))
return NULL;
/* Also curious about this function is that it accepts format strings
like "%xg", which are invalid for floats. In general, the
interface to this function is not very good, but changing it is
difficult because it's a public API. */
if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' ||
format_char == 'Z'))
return NULL;
/* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */
if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In
practice, this will never happen and will be
detected by returning NULL */
return NULL;
}
strcpy(tmp_format, format);
tmp_format[format_len - 1] = 'g';
format = tmp_format;
}
/* Have PyOS_snprintf do the hard work */
PyOS_snprintf(buffer, buf_size, format, d);
/* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string.
Convert that string back to a dot. */
change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
for the extra zeros. Also, if there are more than
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
back to MIN_EXPONENT_DIGITS */
ensure_minimum_exponent_length(buffer, buf_size);
/* If format_char is 'Z', make sure we have at least one character
after the decimal point (and make sure we have a decimal point);
also switch to exponential notation in some edge cases where the
extra character would produce more significant digits that we
really want. */
if (format_char == 'Z')
buffer = ensure_decimal_point(buffer, buf_size, precision);
return buffer;
}
char *
PyOS_ascii_formatd(char *buffer,
size_t buf_size,
const char *format,
double d)
{
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PyOS_ascii_formatd is deprecated, "
"use PyOS_double_to_string instead", 1) < 0)
return NULL;
return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
}
#ifdef PY_NO_SHORT_FLOAT_REPR
/* The fallback code to use if _Py_dg_dtoa is not available. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
char format[32];
Py_ssize_t bufsize;
char *buf;
int t, exp;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
/* The repr() precision (17 significant decimal digits) is the
minimal number that is guaranteed to have enough precision
so that if the number is read back in the exact same binary
value is recreated. This is true for IEEE floating point
by design, and also happens to work for all other modern
hardware. */
precision = 17;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Here's a quick-and-dirty calculation to figure out how big a buffer
we need. In general, for a finite float we need:
1 byte for each digit of the decimal significand, and
1 for a possible sign
1 for a possible decimal point
2 for a possible [eE][+-]
1 for each digit of the exponent; if we allow 19 digits
total then we're safe up to exponents of 2**63.
1 for the trailing nul byte
This gives a total of 24 + the number of digits in the significand,
and the number of digits in the significand is:
for 'g' format: at most precision, except possibly
when precision == 0, when it's 1.
for 'e' format: precision+1
for 'f' format: precision digits after the point, at least 1
before. To figure out how many digits appear before the point
we have to examine the size of the number. If fabs(val) < 1.0
then there will be only one digit before the point. If
fabs(val) >= 1.0, then there are at most
1+floor(log10(ceiling(fabs(val))))
digits before the point (where the 'ceiling' allows for the
possibility that the rounding rounds the integer part of val
up). A safe upper bound for the above quantity is
1+floor(exp/3), where exp is the unique integer such that 0.5
<= fabs(val)/2**exp < 1.0. This exp can be obtained from
frexp.
So we allow room for precision+1 digits for all formats, plus an
extra floor(exp/3) digits for 'f' format.
*/
if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
bufsize = 5;
else {
bufsize = 25 + precision;
if (format_code == 'f' && fabs(val) >= 1.0) {
frexp(val, &exp);
bufsize += exp/3;
}
}
buf = PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
(flags & Py_DTSF_ALT ? "#" : ""), precision,
format_code);
_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
}
/* Add sign when requested. It's convenient (esp. when formatting
complex numbers) to include a sign even for inf and nan. */
if (flags & Py_DTSF_SIGN && buf[0] != '-') {
size_t len = strlen(buf);
/* the bufsize calculations above should ensure that we've got
space to add a sign */
assert((size_t)bufsize >= len+2);
memmove(buf+1, buf, len+1);
buf[0] = '+';
}
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = buf; *p1; p1++)
*p1 = Py_TOUPPER(*p1);
}
if (type)
*type = t;
return buf;
}
#else
/* _Py_dg_dtoa is available. */
/* I'm using a lookup table here so that I don't have to invent a non-locale
specific way to convert to uppercase */
#define OFS_INF 0
#define OFS_NAN 1
#define OFS_E 2
/* The lengths of these are known to the code below, so don't change them */
static char *lc_float_strings[] = {
"inf",
"nan",
"e",
};
static char *uc_float_strings[] = {
"INF",
"NAN",
"E",
};
/* Convert a double d to a string, and return a PyMem_Malloc'd block of
memory contain the resulting string.
Arguments:
d is the double to be converted
format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
mode is one of '0', '2' or '3', and is completely determined by
format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
precision is the desired precision
always_add_sign is nonzero if a '+' sign should be included for positive
numbers
add_dot_0_if_integer is nonzero if integers in non-exponential form
should have ".0" added. Only applies to format codes 'r' and 'g'.
use_alt_formatting is nonzero if alternative formatting should be
used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
at most one of use_alt_formatting and add_dot_0_if_integer should
be nonzero.
type, if non-NULL, will be set to one of these constants to identify
the type of the 'd' argument:
Py_DTST_FINITE
Py_DTST_INFINITE
Py_DTST_NAN
Returns a PyMem_Malloc'd block of memory containing the resulting string,
or NULL on error. If NULL is returned, the Python error has been set.
*/
static char *
format_float_short(double d, char format_code,
int mode, Py_ssize_t precision,
int always_add_sign, int add_dot_0_if_integer,
int use_alt_formatting, char **float_strings, int *type)
{
char *buf = NULL;
char *p = NULL;
Py_ssize_t bufsize = 0;
char *digits, *digits_end;
int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
_Py_SET_53BIT_PRECISION_HEADER;
/* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
Must be matched by a call to _Py_dg_freedtoa. */
_Py_SET_53BIT_PRECISION_START;
digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
&digits_end);
_Py_SET_53BIT_PRECISION_END;
decpt = (Py_ssize_t)decpt_as_int;
if (digits == NULL) {
/* The only failure mode is no memory. */
PyErr_NoMemory();
goto exit;
}
assert(digits_end != NULL && digits_end >= digits);
digits_len = digits_end - digits;
if (digits_len && !Py_ISDIGIT(digits[0])) {
/* Infinities and nans here; adapt Gay's output,
so convert Infinity to inf and NaN to nan, and
ignore sign of nan. Then return. */
/* ignore the actual sign of a nan */
if (digits[0] == 'n' || digits[0] == 'N')
sign = 0;
/* We only need 5 bytes to hold the result "+inf\0" . */
bufsize = 5; /* Used later in an assert. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
if (sign == 1) {
*p++ = '-';
}
else if (always_add_sign) {
*p++ = '+';
}
if (digits[0] == 'i' || digits[0] == 'I') {
strncpy(p, float_strings[OFS_INF], 3);
p += 3;
if (type)
*type = Py_DTST_INFINITE;
}
else if (digits[0] == 'n' || digits[0] == 'N') {
strncpy(p, float_strings[OFS_NAN], 3);
p += 3;
if (type)
*type = Py_DTST_NAN;
}
else {
/* shouldn't get here: Gay's code should always return
something starting with a digit, an 'I', or 'N' */
strncpy(p, "ERR", 3);
p += 3;
assert(0);
}
goto exit;
}
/* The result must be finite (not inf or nan). */
if (type)
*type = Py_DTST_FINITE;
/* We got digits back, format them. We may need to pad 'digits'
either on the left or right (or both) with extra zeros, so in
general the resulting string has the form
[<sign>]<zeros><digits><zeros>[<exponent>]
where either of the <zeros> pieces could be empty, and there's a
decimal point that could appear either in <digits> or in the
leading or trailing <zeros>.
Imagine an infinite 'virtual' string vdigits, consisting of the
string 'digits' (starting at index 0) padded on both the left and
right with infinite strings of zeros. We want to output a slice
vdigits[vdigits_start : vdigits_end]
of this virtual string. Thus if vdigits_start < 0 then we'll end
up producing some leading zeros; if vdigits_end > digits_len there
will be trailing zeros in the output. The next section of code
determines whether to use an exponent or not, figures out the
position 'decpt' of the decimal point, and computes 'vdigits_start'
and 'vdigits_end'. */
vdigits_end = digits_len;
switch (format_code) {
case 'e':
use_exp = 1;
vdigits_end = precision;
break;
case 'f':
vdigits_end = decpt + precision;
break;
case 'g':
if (decpt <= -4 || decpt >
(add_dot_0_if_integer ? precision-1 : precision))
use_exp = 1;
if (use_alt_formatting)
vdigits_end = precision;
break;
case 'r':
/* convert to exponential format at 1e16. We used to convert
at 1e17, but that gives odd-looking results for some values
when a 16-digit 'shortest' repr is padded with bogus zeros.
For example, repr(2e16+8) would give 20000000000000010.0;
the true value is 20000000000000008.0. */
if (decpt <= -4 || decpt > 16)
use_exp = 1;
break;
default:
PyErr_BadInternalCall();
goto exit;
}
/* if using an exponent, reset decimal point position to 1 and adjust
exponent accordingly.*/
if (use_exp) {
exp = decpt - 1;
decpt = 1;
}
/* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
decpt < vdigits_end if add_dot_0_if_integer and no exponent */
vdigits_start = decpt <= 0 ? decpt-1 : 0;
if (!use_exp && add_dot_0_if_integer)
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
else
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
/* double check inequalities */
assert(vdigits_start <= 0 &&
0 <= digits_len &&
digits_len <= vdigits_end);
/* decimal point should be in (vdigits_start, vdigits_end] */
assert(vdigits_start < decpt && decpt <= vdigits_end);
/* Compute an upper bound how much memory we need. This might be a few
chars too long, but no big deal. */
bufsize =
/* sign, decimal point and trailing 0 byte */
3 +
/* total digit count (including zero padding on both sides) */
(vdigits_end - vdigits_start) +
/* exponent "e+100", max 3 numerical digits */
(use_exp ? 5 : 0);
/* Now allocate the memory and initialize p to point to the start of
it. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
/* Add a negative sign if negative, and a plus sign if non-negative
and always_add_sign is true. */
if (sign == 1)
*p++ = '-';
else if (always_add_sign)
*p++ = '+';
/* note that exactly one of the three 'if' conditions is true,
so we include exactly one decimal point */
/* Zero padding on left of digit string */
if (decpt <= 0) {
memset(p, '0', decpt-vdigits_start);
p += decpt - vdigits_start;
*p++ = '.';
memset(p, '0', 0-decpt);
p += 0-decpt;
}
else {
memset(p, '0', 0-vdigits_start);
p += 0 - vdigits_start;
}
/* Digits, with included decimal point */
if (0 < decpt && decpt <= digits_len) {
strncpy(p, digits, decpt-0);
p += decpt-0;
*p++ = '.';
strncpy(p, digits+decpt, digits_len-decpt);
p += digits_len-decpt;
}
else {
strncpy(p, digits, digits_len);
p += digits_len;
}
/* And zeros on the right */
if (digits_len < decpt) {
memset(p, '0', decpt-digits_len);
p += decpt-digits_len;
*p++ = '.';
memset(p, '0', vdigits_end-decpt);
p += vdigits_end-decpt;
}
else {
memset(p, '0', vdigits_end-digits_len);
p += vdigits_end-digits_len;
}
/* Delete a trailing decimal pt unless using alternative formatting. */
if (p[-1] == '.' && !use_alt_formatting)
p--;
/* Now that we've done zero padding, add an exponent if needed. */
if (use_exp) {
*p++ = float_strings[OFS_E][0];
exp_len = sprintf(p, "%+.02d", exp);
p += exp_len;
}
exit:
if (buf) {
*p = '\0';
/* It's too late if this fails, as we've already stepped on
memory that isn't ours. But it's an okay debugging test. */
assert(p-buf < bufsize);
}
if (digits)
_Py_dg_freedtoa(digits);
return buf;
}
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
char **float_strings = lc_float_strings;
int mode;
/* Validate format_code, and map upper and lower case. Compute the
mode and make any adjustments as needed. */
switch (format_code) {
/* exponent */
case 'E':
float_strings = uc_float_strings;
format_code = 'e';
/* Fall through. */
case 'e':
mode = 2;
precision++;
break;
/* fixed */
case 'F':
float_strings = uc_float_strings;
format_code = 'f';
/* Fall through. */
case 'f':
mode = 3;
break;
/* general */
case 'G':
float_strings = uc_float_strings;
format_code = 'g';
/* Fall through. */
case 'g':
mode = 2;
/* precision 0 makes no sense for 'g' format; interpret as 1 */
if (precision == 0)
precision = 1;
break;
/* repr format */
case 'r':
mode = 0;
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
break;
default:
PyErr_BadInternalCall();
return NULL;
}
return format_float_short(val, format_code, mode, precision,
flags & Py_DTSF_SIGN,
flags & Py_DTSF_ADD_DOT_0,
flags & Py_DTSF_ALT,
float_strings, type);
}
#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
......@@ -18,10 +18,10 @@ add_custom_target(copy_stdlib ALL DEPENDS ${STDLIB_TARGETS})
file(GLOB_RECURSE STDMODULE_SRCS 2.7_Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c)
# compile specified files in lib_python/2.7_Objects
file(GLOB_RECURSE STDOBJECT_SRCS 2.7_Objects structseq.c capsule.c)
file(GLOB_RECURSE STDOBJECT_SRCS 2.7_Objects structseq.c capsule.c stringobject.c)
# compile specified files in lib_python/2.7_Python
file(GLOB_RECURSE STDPYTHON_SRCS 2.7_Python getargs.c pyctype.c)
file(GLOB_RECURSE STDPYTHON_SRCS 2.7_Python getargs.c pyctype.c formatter_string.c pystrtod.c dtoa.c)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-missing-field-initializers -Wno-tautological-compare -Wno-type-limits")
add_library(FROM_CPYTHON OBJECT ${STDMODULE_SRCS} ${STDOBJECT_SRCS} ${STDPYTHON_SRCS})
// Copyright (c) 2014 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdarg.h>
#include <string.h>
#include "Python.h"
#include "capi/types.h"
#include "core/threading.h"
#include "core/types.h"
#include "runtime/import.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pyston {
extern "C" PyObject* PyObject_CallFunctionObjArgs(PyObject* callable, ...) {
Py_FatalError("unimplemented");
}
}
// Copyright (c) 2014 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdarg.h>
#include <string.h>
#include "Python.h"
#include "capi/types.h"
#include "core/threading.h"
#include "core/types.h"
#include "runtime/import.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pyston {
extern "C" PyObject* PyObject_Unicode(PyObject* v) {
Py_FatalError("unimplemented");
}
extern "C" PyObject* _PyObject_Str(PyObject* v) {
if (v == NULL)
return boxStrConstant("<NULL>");
if (v->cls == str_cls)
return v;
try {
return str(v);
} catch (Box* b) {
PyErr_SetObject(b->cls, b);
return NULL;
}
}
extern "C" PyObject* PyObject_Str(PyObject* v) {
PyObject* res = _PyObject_Str(v);
if (res == NULL)
return NULL;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(res)) {
PyObject* str;
str = PyUnicode_AsEncodedString(res, NULL, NULL);
Py_DECREF(res);
if (str)
res = str;
else
return NULL;
}
#endif
assert(PyString_Check(res));
return res;
}
}
......@@ -289,6 +289,11 @@ static PyObject* lookup_maybe(PyObject* self, const char* attrstr, PyObject** at
return obj;
}
extern "C" PyObject* _PyObject_LookupSpecial(PyObject* self, const char* attrstr, PyObject** attrobj) {
assert(!PyInstance_Check(self));
return lookup_maybe(self, attrstr, attrobj);
}
static PyObject* lookup_method(PyObject* self, const char* attrstr, PyObject** attrobj) noexcept {
PyObject* res = lookup_maybe(self, attrstr, attrobj);
if (res == NULL && !PyErr_Occurred())
......
......@@ -1366,9 +1366,8 @@ public:
return NULL;
BoxedFunction* rtattr_func = static_cast<BoxedFunction*>(rtattr);
RELEASE_ASSERT(!argspec.has_starargs, "");
RELEASE_ASSERT(!argspec.has_kwargs, "");
RELEASE_ASSERT(argspec.num_keywords == 0, "");
if (argspec.num_keywords || argspec.has_starargs || argspec.has_kwargs)
return NULL;
CLFunction* cl = rtattr_func->f;
assert(cl);
......
......@@ -492,7 +492,7 @@ BoxedClass* BaseException, *Exception, *StandardError, *AssertionError, *Attribu
*NameError, *KeyError, *IndexError, *IOError, *OSError, *ZeroDivisionError, *ValueError, *UnboundLocalError,
*RuntimeError, *ImportError, *StopIteration, *Warning, *SyntaxError, *OverflowError, *DeprecationWarning,
*MemoryError, *LookupError, *EnvironmentError, *ArithmeticError, *BufferError, *KeyboardInterrupt, *SystemExit,
*SystemError, *NotImplementedError;
*SystemError, *NotImplementedError, *PendingDeprecationWarning;
}
Box* exceptionNew1(BoxedClass* cls) {
......@@ -815,7 +815,6 @@ void setupBuiltins() {
SyntaxError = makeBuiltinException(StandardError, "SyntaxError");
OverflowError = makeBuiltinException(ArithmeticError, "OverflowError");
/*ImportWarning =*/makeBuiltinException(Warning, "ImportWarning");
/*PendingDeprecationWarning =*/makeBuiltinException(Warning, "PendingDeprecationWarning");
DeprecationWarning = makeBuiltinException(Warning, "DeprecationWarning");
/*BytesWarning =*/makeBuiltinException(Warning, "BytesWarning");
MemoryError = makeBuiltinException(StandardError, "MemoryError");
......@@ -824,6 +823,7 @@ void setupBuiltins() {
SystemExit = makeBuiltinException(BaseException, "SystemExit");
SystemError = makeBuiltinException(StandardError, "SystemError");
NotImplementedError = makeBuiltinException(RuntimeError, "NotImplementedError");
PendingDeprecationWarning = makeBuiltinException(Warning, "PendingDeprecationWarning");
EnvironmentError->giveAttr(
"__init__",
......
......@@ -262,6 +262,152 @@ extern "C" PyObject* PyObject_Repr(PyObject* obj) {
}
}
extern "C" PyObject* PyObject_Format(PyObject* obj, PyObject* format_spec) {
PyObject* empty = NULL;
PyObject* result = NULL;
#ifdef Py_USING_UNICODE
int spec_is_unicode;
int result_is_unicode;
#endif
/* If no format_spec is provided, use an empty string */
if (format_spec == NULL) {
empty = PyString_FromStringAndSize(NULL, 0);
format_spec = empty;
}
/* Check the format_spec type, and make sure it's str or unicode */
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(format_spec))
spec_is_unicode = 1;
else if (PyString_Check(format_spec))
spec_is_unicode = 0;
else {
#else
if (!PyString_Check(format_spec)) {
#endif
PyErr_Format(PyExc_TypeError, "format expects arg 2 to be string "
"or unicode, not %.100s",
Py_TYPE(format_spec)->tp_name);
goto done;
}
/* Check for a __format__ method and call it. */
if (PyInstance_Check(obj)) {
/* We're an instance of a classic class */
PyObject* bound_method = PyObject_GetAttrString(obj, "__format__");
if (bound_method != NULL) {
result = PyObject_CallFunctionObjArgs(bound_method, format_spec, NULL);
Py_DECREF(bound_method);
} else {
PyObject* self_as_str = NULL;
PyObject* format_method = NULL;
Py_ssize_t format_len;
PyErr_Clear();
/* Per the PEP, convert to str (or unicode,
depending on the type of the format
specifier). For new-style classes, this
logic is done by object.__format__(). */
#ifdef Py_USING_UNICODE
if (spec_is_unicode) {
format_len = PyUnicode_GET_SIZE(format_spec);
self_as_str = PyObject_Unicode(obj);
} else
#endif
{
format_len = PyString_GET_SIZE(format_spec);
self_as_str = PyObject_Str(obj);
}
if (self_as_str == NULL)
goto done1;
if (format_len > 0) {
/* See the almost identical code in
typeobject.c for new-style
classes. */
if (PyErr_WarnEx(PyExc_PendingDeprecationWarning, "object.__format__ with a non-empty "
"format string is deprecated",
1) < 0) {
goto done1;
}
/* Eventually this will become an
error:
PyErr_Format(PyExc_TypeError,
"non-empty format string passed to "
"object.__format__");
goto done1;
*/
}
/* Then call str.__format__ on that result */
format_method = PyObject_GetAttrString(self_as_str, "__format__");
if (format_method == NULL) {
goto done1;
}
result = PyObject_CallFunctionObjArgs(format_method, format_spec, NULL);
done1:
Py_XDECREF(self_as_str);
Py_XDECREF(format_method);
if (result == NULL)
goto done;
}
} else {
/* Not an instance of a classic class, use the code
from py3k */
static PyObject* format_cache = NULL;
/* Find the (unbound!) __format__ method (a borrowed
reference) */
PyObject* method = _PyObject_LookupSpecial(obj, "__format__", &format_cache);
if (method == NULL) {
if (!PyErr_Occurred())
PyErr_Format(PyExc_TypeError, "Type %.100s doesn't define __format__", Py_TYPE(obj)->tp_name);
goto done;
}
/* And call it. */
result = PyObject_CallFunctionObjArgs(method, format_spec, NULL);
Py_DECREF(method);
}
if (result == NULL)
goto done;
/* Check the result type, and make sure it's str or unicode */
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(result))
result_is_unicode = 1;
else if (PyString_Check(result))
result_is_unicode = 0;
else {
#else
if (!PyString_Check(result)) {
#endif
PyErr_Format(PyExc_TypeError, "%.100s.__format__ must return string or "
"unicode, not %.100s",
Py_TYPE(obj)->tp_name, Py_TYPE(result)->tp_name);
Py_DECREF(result);
result = NULL;
goto done;
}
/* Convert to unicode, if needed. Required if spec is unicode
and result is str */
#ifdef Py_USING_UNICODE
if (spec_is_unicode && !result_is_unicode) {
PyObject* tmp = PyObject_Unicode(result);
/* This logic works whether or not tmp is NULL */
Py_DECREF(result);
result = tmp;
}
#endif
done:
Py_XDECREF(empty);
return result;
}
extern "C" PyObject* PyObject_GetAttr(PyObject* o, PyObject* attr_name) {
if (!isSubclass(attr_name->cls, str_cls)) {
PyErr_Format(PyExc_TypeError, "attribute name must be string, not '%.200s'", Py_TYPE(attr_name)->tp_name);
......
......@@ -23,7 +23,9 @@
namespace pyston {
extern "C" {
BoxedClass* classobj_cls, *instance_cls;
}
bool classobjIssubclass(BoxedClassobj* child, BoxedClassobj* parent) {
if (child == parent)
......
......@@ -24,7 +24,9 @@ void setupClassobj();
class BoxedClass;
class BoxedClassobj;
class BoxedInstance;
extern "C" {
extern BoxedClass* classobj_cls, *instance_cls;
}
bool instanceIsinstance(BoxedInstance* obj, BoxedClassobj* cls);
bool classobjIssubclass(BoxedClassobj* child, BoxedClassobj* parent);
......
......@@ -34,6 +34,22 @@ extern "C" Py_complex PyComplex_AsCComplex(PyObject* op) {
Py_FatalError("unimplemented");
}
extern "C" double PyComplex_RealAsDouble(PyObject* op) {
if (PyComplex_Check(op)) {
return static_cast<BoxedComplex*>(op)->real;
} else {
return PyFloat_AsDouble(op);
}
}
extern "C" double PyComplex_ImagAsDouble(PyObject* op) {
if (PyComplex_Check(op)) {
return static_cast<BoxedComplex*>(op)->imag;
} else {
return 0.0;
}
}
// addition
extern "C" Box* complexAddComplex(BoxedComplex* lhs, BoxedComplex* rhs) {
......
......@@ -58,6 +58,10 @@ extern "C" PyObject* PyInt_FromLong(long n) {
return boxInt(n);
}
extern "C" PyAPI_FUNC(PyObject*) _PyInt_Format(PyIntObject* v, int base, int newstyle) {
Py_FatalError("unimplemented");
}
BoxedInt* interned_ints[NUM_INTERNED_INTS];
// If we don't have fast overflow-checking builtins, provide some slow variants:
......
......@@ -113,6 +113,10 @@ extern "C" double PyLong_AsDouble(PyObject* vv) {
return mpz_get_d(l->n);
}
extern "C" PyAPI_FUNC(PyObject*) _PyLong_Format(PyObject* aa, int base, int addL, int newstyle) {
Py_FatalError("unimplemented");
}
extern "C" PyObject* PyLong_FromDouble(double v) {
Py_FatalError("unimplemented");
}
......@@ -129,6 +133,23 @@ extern "C" PyObject* PyLong_FromUnsignedLong(unsigned long ival) {
return rtn;
}
#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one
#define PY_ABS_LLONG_MIN (0 - (unsigned PY_LONG_LONG)PY_LLONG_MIN)
extern "C" PyObject* PyLong_FromSsize_t(Py_ssize_t ival) {
Py_ssize_t bytes = ival;
int one = 1;
return _PyLong_FromByteArray((unsigned char*)&bytes, SIZEOF_SIZE_T, IS_LITTLE_ENDIAN, 1);
}
extern "C" PyObject* PyLong_FromSize_t(size_t ival) {
size_t bytes = ival;
int one = 1;
return _PyLong_FromByteArray((unsigned char*)&bytes, SIZEOF_SIZE_T, IS_LITTLE_ENDIAN, 0);
}
#undef IS_LITTLE_ENDIAN
extern "C" double _PyLong_Frexp(PyLongObject* a, Py_ssize_t* e) {
Py_FatalError("unimplemented");
}
......
......@@ -24,6 +24,7 @@
#include "core/types.h"
#include "core/util.h"
#include "gc/collector.h"
#include "runtime/capi.h"
#include "runtime/dict.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
......@@ -650,6 +651,18 @@ Box* strPartition(BoxedString* self, BoxedString* sep) {
self->s.size() - found_idx - sep->s.size()) });
}
extern "C" PyObject* do_string_format(PyObject* self, PyObject* args, PyObject* kwargs);
Box* strFormat(BoxedString* self, BoxedTuple* args, BoxedDict* kwargs) {
assert(args->cls == tuple_cls);
assert(kwargs->cls == dict_cls);
Box* rtn = do_string_format(self, args, kwargs);
checkAndThrowCAPIException();
assert(rtn);
return rtn;
}
Box* strSplit(BoxedString* self, BoxedString* sep, BoxedInt* _max_split) {
assert(self->cls == str_cls);
if (_max_split->cls != int_cls)
......@@ -1039,7 +1052,13 @@ extern "C" Py_ssize_t PyString_Size(PyObject* s) {
}
extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) {
Py_FatalError("unimplemented");
// This is only allowed to be called when there is only one user of the string (ie a refcount of 1 in CPython)
assert(pv);
assert((*pv)->cls == str_cls);
BoxedString* s = static_cast<BoxedString*>(*pv);
s->s.resize(newsize, '\0');
return 0;
}
static Py_ssize_t string_buffer_getreadbuf(PyObject* self, Py_ssize_t index, const void** ptr) {
......@@ -1120,6 +1139,8 @@ void setupStr() {
str_cls->giveAttr("partition", new BoxedFunction(boxRTFunction((void*)strPartition, UNKNOWN, 2)));
str_cls->giveAttr("format", new BoxedFunction(boxRTFunction((void*)strFormat, UNKNOWN, 1, 0, true, true)));
str_cls->giveAttr("__add__", new BoxedFunction(boxRTFunction((void*)strAdd, UNKNOWN, 2)));
str_cls->giveAttr("__mod__", new BoxedFunction(boxRTFunction((void*)strMod, STR, 2)));
str_cls->giveAttr("__mul__", new BoxedFunction(boxRTFunction((void*)strMul, UNKNOWN, 2)));
......
......@@ -78,3 +78,5 @@ print "hello world".partition("hello")
print "hello world".partition("o")
print "hello world"[False:True:True]
print "{hello}".format(hello="world")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment