Commit e56c9410 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge commit 'pr/424'

Conflicts:
	CMakeLists.txt
	tools/tester.py
parents 1512a30c 5cad330e
...@@ -198,6 +198,7 @@ add_custom_target(astcompare COMMAND ${CMAKE_SOURCE_DIR}/tools/astprint_test.sh ...@@ -198,6 +198,7 @@ add_custom_target(astcompare COMMAND ${CMAKE_SOURCE_DIR}/tools/astprint_test.sh
# test # test
enable_testing() enable_testing()
set(TEST_THREADS 1 CACHE STRING "number of pyston test threads") set(TEST_THREADS 1 CACHE STRING "number of pyston test threads")
set(PYTHONIOENCODING utf-8)
add_test(NAME lint COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/lint.py WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src) add_test(NAME lint COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/lint.py WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src)
add_test(NAME check-format COMMAND ${CMAKE_SOURCE_DIR}/tools/check_format.sh ${LLVM_TOOLS_BINARY_DIR}/clang-format WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src) add_test(NAME check-format COMMAND ${CMAKE_SOURCE_DIR}/tools/check_format.sh ${LLVM_TOOLS_BINARY_DIR}/clang-format WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src)
......
...@@ -911,7 +911,9 @@ Value ASTInterpreter::visit_print(AST_Print* node) { ...@@ -911,7 +911,9 @@ Value ASTInterpreter::visit_print(AST_Print* node) {
if (softspace(dest, new_softspace)) { if (softspace(dest, new_softspace)) {
callattrInternal(dest, &write_str, CLASS_OR_INST, 0, ArgPassSpec(1), boxString(space_str), 0, 0, 0, 0); callattrInternal(dest, &write_str, CLASS_OR_INST, 0, ArgPassSpec(1), boxString(space_str), 0, 0, 0, 0);
} }
callattrInternal(dest, &write_str, CLASS_OR_INST, 0, ArgPassSpec(1), str(var), 0, 0, 0, 0);
Box* str_or_unicode_var = (var->cls == unicode_cls) ? var : str(var);
callattrInternal(dest, &write_str, CLASS_OR_INST, 0, ArgPassSpec(1), str_or_unicode_var, 0, 0, 0, 0);
} }
if (node->nl) { if (node->nl) {
......
...@@ -1845,7 +1845,7 @@ private: ...@@ -1845,7 +1845,7 @@ private:
// end code for handling of softspace // end code for handling of softspace
llvm::Value* v = emitter.createCall(unw_info, g.funcs.str, converted->getValue()); llvm::Value* v = emitter.createCall(unw_info, g.funcs.strOrUnicode, converted->getValue());
v = emitter.getBuilder()->CreateBitCast(v, g.llvm_value_type_ptr); v = emitter.getBuilder()->CreateBitCast(v, g.llvm_value_type_ptr);
auto s = new ConcreteCompilerVariable(STR, v, true); auto s = new ConcreteCompilerVariable(STR, v, true);
r = dest->callattr(emitter, getOpInfoForNode(node, unw_info), &write_str, flags, ArgPassSpec(1), { s }, r = dest->callattr(emitter, getOpInfoForNode(node, unw_info), &write_str, flags, ArgPassSpec(1), { s },
......
...@@ -210,6 +210,7 @@ void initGlobalFuncs(GlobalState& g) { ...@@ -210,6 +210,7 @@ void initGlobalFuncs(GlobalState& g) {
GET(importStar); GET(importStar);
GET(repr); GET(repr);
GET(str); GET(str);
GET(strOrUnicode);
GET(exceptionMatches); GET(exceptionMatches);
GET(yield); GET(yield);
GET(getiterHelper); GET(getiterHelper);
......
...@@ -38,7 +38,7 @@ struct GlobalFuncs { ...@@ -38,7 +38,7 @@ struct GlobalFuncs {
*decodeUTF8StringPtr; *decodeUTF8StringPtr;
llvm::Value* getattr, *setattr, *delattr, *delitem, *delGlobal, *nonzero, *binop, *compare, *augbinop, *unboxedLen, llvm::Value* getattr, *setattr, *delattr, *delitem, *delGlobal, *nonzero, *binop, *compare, *augbinop, *unboxedLen,
*getitem, *getclsattr, *getGlobal, *setitem, *unaryop, *import, *importFrom, *importStar, *repr, *str, *getitem, *getclsattr, *getGlobal, *setitem, *unaryop, *import, *importFrom, *importStar, *repr, *str,
*exceptionMatches, *yield, *getiterHelper, *hasnext; *strOrUnicode, *exceptionMatches, *yield, *getiterHelper, *hasnext;
llvm::Value* unpackIntoArray, *raiseAttributeError, *raiseAttributeErrorStr, *raiseNotIterableError, llvm::Value* unpackIntoArray, *raiseAttributeError, *raiseAttributeErrorStr, *raiseNotIterableError,
*raiseIndexErrorStr, *assertNameDefined, *assertFail, *assertFailDerefNameDefined; *raiseIndexErrorStr, *assertNameDefined, *assertFail, *assertFailDerefNameDefined;
......
...@@ -50,6 +50,8 @@ ...@@ -50,6 +50,8 @@
namespace pyston { namespace pyston {
extern void setEncodingAndErrors();
// returns true iff we got a request to exit, i.e. SystemExit, placing the // returns true iff we got a request to exit, i.e. SystemExit, placing the
// return code in `*retcode`. does not touch `*retcode* if it returns false. // return code in `*retcode`. does not touch `*retcode* if it returns false.
static bool handle_toplevel_exn(const ExcInfo& e, int* retcode) { static bool handle_toplevel_exn(const ExcInfo& e, int* retcode) {
...@@ -186,6 +188,11 @@ static int main(int argc, char** argv) { ...@@ -186,6 +188,11 @@ static int main(int argc, char** argv) {
} }
} }
// Set encoding for standard streams. This needs to be done after
// sys.path is properly set up, so that we can import the
// encodings module.
setEncodingAndErrors();
// end of argument parsing // end of argument parsing
_t.split("to run"); _t.split("to run");
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <langinfo.h>
#include <sstream> #include <sstream>
#include "llvm/Support/FileSystem.h" #include "llvm/Support/FileSystem.h"
...@@ -247,6 +248,119 @@ static bool isLittleEndian() { ...@@ -247,6 +248,119 @@ static bool isLittleEndian() {
return s[0] != 0; return s[0] != 0;
} }
void setEncodingAndErrors() {
// Adapted from pythonrun.c in CPython, with modifications for Pyston.
char* p;
char* icodeset = nullptr;
char* codeset = nullptr;
char* errors = nullptr;
int free_codeset = 0;
int overridden = 0;
PyObject* sys_stream, *sys_isatty;
char* saved_locale, *loc_codeset;
if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
p = icodeset = codeset = strdup(p);
free_codeset = 1;
errors = strchr(p, ':');
if (errors) {
*errors = '\0';
errors++;
}
overridden = 1;
}
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals, unless overridden. */
if (!overridden || !Py_FileSystemDefaultEncoding) {
saved_locale = strdup(setlocale(LC_CTYPE, NULL));
setlocale(LC_CTYPE, "");
loc_codeset = nl_langinfo(CODESET);
if (loc_codeset && *loc_codeset) {
PyObject* enc = PyCodec_Encoder(loc_codeset);
if (enc) {
loc_codeset = strdup(loc_codeset);
Py_DECREF(enc);
} else {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
PyErr_Clear();
loc_codeset = NULL;
} else {
PyErr_Print();
exit(1);
}
}
} else
loc_codeset = NULL;
setlocale(LC_CTYPE, saved_locale);
free(saved_locale);
if (!overridden) {
codeset = icodeset = loc_codeset;
free_codeset = 1;
}
/* Initialize Py_FileSystemDefaultEncoding from
locale even if PYTHONIOENCODING is set. */
if (!Py_FileSystemDefaultEncoding) {
Py_FileSystemDefaultEncoding = loc_codeset;
if (!overridden)
free_codeset = 0;
}
}
#endif
#ifdef MS_WINDOWS
if (!overridden) {
icodeset = ibuf;
codeset = buf;
sprintf(ibuf, "cp%d", GetConsoleCP());
sprintf(buf, "cp%d", GetConsoleOutputCP());
}
#endif
if (codeset) {
sys_stream = PySys_GetObject("stdin");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if ((overridden || (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) {
if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
Py_FatalError("Cannot set codeset of stdin");
}
Py_XDECREF(sys_isatty);
sys_stream = PySys_GetObject("stdout");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if ((overridden || (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) {
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stdout");
}
Py_XDECREF(sys_isatty);
sys_stream = PySys_GetObject("stderr");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty)
PyErr_Clear();
if ((overridden || (sys_isatty && PyObject_IsTrue(sys_isatty))) && PyFile_Check(sys_stream)) {
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stderr");
}
Py_XDECREF(sys_isatty);
if (free_codeset)
free(codeset);
}
}
extern "C" const char* Py_GetPlatform() noexcept { extern "C" const char* Py_GetPlatform() noexcept {
// cpython does this check in their configure script // cpython does this check in their configure script
#if defined(__linux__) #if defined(__linux__)
......
...@@ -1153,6 +1153,38 @@ extern "C" void PyFile_SetBufSize(PyObject* f, int bufsize) noexcept { ...@@ -1153,6 +1153,38 @@ extern "C" void PyFile_SetBufSize(PyObject* f, int bufsize) noexcept {
} }
} }
/* Set the encoding used to output Unicode strings.
Return 1 on success, 0 on failure. */
extern "C" int PyFile_SetEncoding(PyObject* f, const char* enc) noexcept {
return PyFile_SetEncodingAndErrors(f, enc, NULL);
}
extern "C" int PyFile_SetEncodingAndErrors(PyObject* f, const char* enc, char* errors) noexcept {
BoxedFile* file = static_cast<BoxedFile*>(f);
PyObject* str, *oerrors;
assert(PyFile_Check(f));
str = PyString_FromString(enc);
if (!str)
return 0;
if (errors) {
oerrors = PyString_FromString(errors);
if (!oerrors) {
Py_DECREF(str);
return 0;
}
} else {
oerrors = Py_None;
Py_INCREF(Py_None);
}
Py_DECREF(file->f_encoding);
file->f_encoding = str;
Py_DECREF(file->f_errors);
file->f_errors = oerrors;
return 1;
}
extern "C" int _PyFile_SanitizeMode(char* mode) noexcept { extern "C" int _PyFile_SanitizeMode(char* mode) noexcept {
char* upos; char* upos;
size_t len = strlen(mode); size_t len = strlen(mode);
......
...@@ -102,6 +102,7 @@ void force() { ...@@ -102,6 +102,7 @@ void force() {
FORCE(assertFailDerefNameDefined); FORCE(assertFailDerefNameDefined);
FORCE(assertFail); FORCE(assertFail);
FORCE(strOrUnicode);
FORCE(printFloat); FORCE(printFloat);
FORCE(listAppendInternal); FORCE(listAppendInternal);
FORCE(getSysStdout); FORCE(getSysStdout);
......
...@@ -2004,6 +2004,14 @@ extern "C" BoxedString* str(Box* obj) { ...@@ -2004,6 +2004,14 @@ extern "C" BoxedString* str(Box* obj) {
return static_cast<BoxedString*>(obj); return static_cast<BoxedString*>(obj);
} }
extern "C" Box* strOrUnicode(Box* obj) {
// Like str, but returns unicode objects unchanged.
if (obj->cls == unicode_cls) {
return obj;
}
return str(obj);
}
extern "C" BoxedString* repr(Box* obj) { extern "C" BoxedString* repr(Box* obj) {
static StatCounter slowpath_repr("slowpath_repr"); static StatCounter slowpath_repr("slowpath_repr");
slowpath_repr.log(); slowpath_repr.log();
......
...@@ -62,6 +62,7 @@ extern "C" BoxedString* str(Box* obj); ...@@ -62,6 +62,7 @@ extern "C" BoxedString* str(Box* obj);
extern "C" BoxedString* repr(Box* obj); extern "C" BoxedString* repr(Box* obj);
extern "C" BoxedString* reprOrNull(Box* obj); // similar to repr, but returns NULL on exception extern "C" BoxedString* reprOrNull(Box* obj); // similar to repr, but returns NULL on exception
extern "C" BoxedString* strOrNull(Box* obj); // similar to str, but returns NULL on exception extern "C" BoxedString* strOrNull(Box* obj); // similar to str, but returns NULL on exception
extern "C" Box* strOrUnicode(Box* obj);
extern "C" bool exceptionMatches(Box* obj, Box* cls); extern "C" bool exceptionMatches(Box* obj, Box* cls);
extern "C" BoxedInt* hash(Box* obj); extern "C" BoxedInt* hash(Box* obj);
extern "C" Box* abs_(Box* obj); extern "C" Box* abs_(Box* obj);
......
# run_args: -n # run_args: -n
# statcheck: noninit_count("slowpath_runtimecall") <= 5 # statcheck: noninit_count("slowpath_runtimecall") <= 10
# statcheck: stats.get("slowpath_callclfunc", 0) <= 5 # statcheck: stats.get("slowpath_callclfunc", 0) <= 5
# Simple patchpoint test: # Simple patchpoint test:
......
print repr(unicode()) print repr(unicode())
print repr(unicode('hello world')) print repr(unicode('hello world'))
print unicode('hello world')
# Some random unicode character: # Some random unicode character:
u = u'\u0180' u = u'\u0180'
print len(u) print len(u)
print repr(u) print repr(u)
print repr(u.encode("utf8")) print repr(u.encode("utf8"))
print u
# This is tricky, since we need to support file encodings, and then set stdout to UTF8:
# print u
d = {} d = {}
d["hello world"] = "hi" d["hello world"] = "hi"
...@@ -38,6 +37,7 @@ print p(s.encode("utf16")) ...@@ -38,6 +37,7 @@ print p(s.encode("utf16"))
print p(s.encode("utf32")) print p(s.encode("utf32"))
print p(s.encode("iso_8859_15")) print p(s.encode("iso_8859_15"))
print p(s.encode(u"utf8")) print p(s.encode(u"utf8"))
print s
print p("hello world".encode(u"utf8")) print p("hello world".encode(u"utf8"))
print repr(u' '.join(["hello", "world"])) print repr(u' '.join(["hello", "world"]))
...@@ -93,6 +93,7 @@ print "hello world".startswith(u'world') ...@@ -93,6 +93,7 @@ print "hello world".startswith(u'world')
print float(u'1.0') print float(u'1.0')
print unichr(97) print unichr(97)
print unichr(23456)
print "hello world".split(u'l') print "hello world".split(u'l')
print "hello world".rsplit(u'l') print "hello world".rsplit(u'l')
......
...@@ -42,6 +42,8 @@ EXIT_CODE_ONLY = False ...@@ -42,6 +42,8 @@ EXIT_CODE_ONLY = False
SKIP_FAILING_TESTS = False SKIP_FAILING_TESTS = False
VERBOSE = 1 VERBOSE = 1
PYTHONIOENCODING = 'utf-8'
# For fun, can test pypy. # For fun, can test pypy.
# Tough because the tester will check to see if the error messages are exactly the # Tough because the tester will check to see if the error messages are exactly the
# same as the system CPython, but the error messages change over micro CPython versions. # same as the system CPython, but the error messages change over micro CPython versions.
...@@ -96,6 +98,7 @@ def get_expected_output(fn): ...@@ -96,6 +98,7 @@ def get_expected_output(fn):
# TODO don't suppress warnings globally: # TODO don't suppress warnings globally:
env = dict(os.environ) env = dict(os.environ)
env["PYTHONPATH"] = EXTMODULE_DIR env["PYTHONPATH"] = EXTMODULE_DIR
env["PYTHONIOENCODING"] = PYTHONIOENCODING
p = subprocess.Popen(["python", "-Wignore", fn], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"), preexec_fn=set_ulimits, env=env) p = subprocess.Popen(["python", "-Wignore", fn], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"), preexec_fn=set_ulimits, env=env)
out, err = p.communicate() out, err = p.communicate()
code = p.wait() code = p.wait()
...@@ -141,11 +144,13 @@ def run_test(fn, check_stats, run_memcheck): ...@@ -141,11 +144,13 @@ def run_test(fn, check_stats, run_memcheck):
if opts.skip: if opts.skip:
return "(skipped: %s)" % opts.skip return "(skipped: %s)" % opts.skip
run_args = [os.path.abspath(IMAGE)] + opts.jit_args + [fn]
start = time.time()
env = dict(os.environ) env = dict(os.environ)
env["PYTHONPATH"] = EXTMODULE_DIR_PYSTON env["PYTHONPATH"] = EXTMODULE_DIR_PYSTON
p = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"), preexec_fn=set_ulimits, env=env) env["PYTHONIOENCODING"] = PYTHONIOENCODING
run_args = [os.path.abspath(IMAGE)] + opts.jit_args + [fn]
start = time.time()
p = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open("/dev/null"),
preexec_fn=set_ulimits, env=env)
out, stderr = p.communicate() out, stderr = p.communicate()
code = p.wait() code = p.wait()
elapsed = time.time() - start elapsed = time.time() - start
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment