Commit 6fbad08e authored by Marius Wachtler's avatar Marius Wachtler

Add support for unicode literals when using pypa

We support the \u, \U and \N escape sequences and
the unicode_literals option.
In addition updates pypa to latest version with unicode support.
parent 075620c7
Subproject commit 94fd3e1551188171fca8fb1d4bb7e2f916be33c4
Subproject commit 2ce3f0ef83f6d3d4bdd1ab841e2ca4c3417d93a4
......@@ -31,6 +31,9 @@
#include "core/stats.h"
#include "core/types.h"
#include "core/util.h"
#include "runtime/capi.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pypa {
bool string_to_double(String const& s, double& result);
......@@ -511,7 +514,7 @@ struct expr_dispatcher {
ResultPtr read(pypa::AstStr& s) {
AST_Str* ptr = new AST_Str();
location(ptr, s);
ptr->str_type = AST_Str::STR;
ptr->str_type = s.unicode ? AST_Str::UNICODE : AST_Str::STR;
ptr->str_data = s.value;
return ptr;
}
......@@ -799,7 +802,7 @@ struct stmt_dispatcher {
location(ptr, d);
AST_Str* str = new AST_Str();
ptr->value = str;
str->str_type = AST_Str::STR;
str->str_type = d.unicode ? AST_Str::UNICODE : AST_Str::STR;
str->str_data = d.doc;
return ptr;
}
......@@ -823,14 +826,32 @@ AST_Module* readModule(pypa::AstModule& t) {
}
void pypaErrorHandler(pypa::Error e) {
// raiseSyntaxError
// void raiseSyntaxError(const char* msg, int lineno, int col_offset, const
// std::string& file, const std::string& func);
if (e.type != pypa::ErrorType::SyntaxWarning) {
raiseSyntaxError(e.message.c_str(), e.cur.line, e.cur.column, e.file_name, std::string());
}
}
pypa::String pypaUnicodeEscapeDecoder(pypa::String s, bool raw_prefix, bool& error) {
try {
error = false;
Box* unicode = NULL;
if (raw_prefix)
unicode = PyUnicode_DecodeRawUnicodeEscape(s.c_str(), s.size(), "strict");
else
unicode = PyUnicode_DecodeUnicodeEscape(s.c_str(), s.size(), "strict");
checkAndThrowCAPIException();
BoxedString* str_utf8 = (BoxedString*)PyUnicode_AsUTF8String(unicode);
checkAndThrowCAPIException();
return str_utf8->s;
} catch (ExcInfo e) {
error = true;
BoxedString* error_message = str(e.value);
if (error_message && error_message->cls == str_cls)
return error_message->s;
return "Encountered an unknown error inside pypaUnicodeEscapeDecoder";
}
}
AST_Module* pypa_parse(char const* file_path) {
pypa::Lexer lexer(file_path);
pypa::SymbolTablePtr symbols;
......@@ -842,6 +863,7 @@ AST_Module* pypa_parse(char const* file_path) {
options.python3only = false;
options.handle_future_errors = false;
options.error_handler = pypaErrorHandler;
options.unicode_escape_handler = pypaUnicodeEscapeDecoder;
if (pypa::parse(lexer, module, symbols, options) && module) {
return readModule(*module);
......
......@@ -231,6 +231,7 @@ static int main(int argc, char** argv) {
add_history(line);
try {
AST_Module* m = parse_string(line);
Timer _t("repl");
......@@ -253,7 +254,6 @@ static int main(int argc, char** argv) {
m->body[0] = p;
}
try {
compileAndRunModule(m, main_module);
} catch (ExcInfo e) {
int retcode = 0xdeadbeef; // should never be seen
......
......@@ -118,11 +118,9 @@ void raiseSyntaxError(const char* msg, int lineno, int col_offset, const std::st
Box* exc = runtimeCall(SyntaxError, ArgPassSpec(1), boxStrConstant(msg), NULL, NULL, NULL, NULL);
auto tb = getTraceback();
// TODO: push the syntax error line back on it:
//// TODO: leaks this!
// last_tb.push_back(new LineInfo(lineno, col_offset, file, func));
raiseRaw(ExcInfo(exc->cls, exc, tb));
std::vector<const LineInfo*> entries = tb->lines;
entries.push_back(new LineInfo(lineno, col_offset, file, func));
raiseRaw(ExcInfo(exc->cls, exc, new BoxedTraceback(std::move(entries))));
}
void _printStacktrace() {
......
# skip-if: '-x' in EXTRA_JIT_ARGS
from StringIO import StringIO
import json
......
# skip-if: '-x' in EXTRA_JIT_ARGS
def f(a):
print a
......
# skip-if: '-x' in EXTRA_JIT_ARGS
print repr(unicode())
print repr(unicode('hello world'))
......@@ -32,6 +30,7 @@ print u"Hello " + " World"
def p(x):
return [hex(ord(i)) for i in x]
s = u"\u20AC" # euro sign
print p(u"\N{EURO SIGN}")
print p(s)
print p(s.encode("utf8"))
print p(s.encode("utf16"))
......
# skip-if: '-x' in EXTRA_JIT_ARGS
import unicodedata
print unicodedata.lookup("EURO SIGN") == u"\u20ac"
print unicodedata.name(u"/")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment