Commit 6bba2b53 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Add file_readlines

parent b1e0937b
...@@ -28,6 +28,18 @@ ...@@ -28,6 +28,18 @@
namespace pyston { namespace pyston {
#define BUF(v) PyString_AS_STRING((PyStringObject*)v)
#ifdef HAVE_GETC_UNLOCKED
#define GETC(f) getc_unlocked(f)
#define FLOCKFILE(f) flockfile(f)
#define FUNLOCKFILE(f) funlockfile(f)
#else
#define GETC(f) getc(f)
#define FLOCKFILE(f)
#define FUNLOCKFILE(f)
#endif
/* Bits in f_newlinetypes */ /* Bits in f_newlinetypes */
#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
#define NEWLINE_CR 1 /* \r newline seen */ #define NEWLINE_CR 1 /* \r newline seen */
...@@ -36,15 +48,24 @@ namespace pyston { ...@@ -36,15 +48,24 @@ namespace pyston {
#define FILE_BEGIN_ALLOW_THREADS(fobj) \ #define FILE_BEGIN_ALLOW_THREADS(fobj) \
{ \ { \
/*fobj->unlocked_count++;*/ \ fobj->unlocked_count++; \
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
#define FILE_END_ALLOW_THREADS(fobj) \ #define FILE_END_ALLOW_THREADS(fobj) \
Py_END_ALLOW_THREADS \ Py_END_ALLOW_THREADS fobj->unlocked_count--; \
/*fobj->unlocked_count--;*/ \ assert(fobj->unlocked_count >= 0); \
/*assert(fobj->unlocked_count >= 0);*/ \
} }
#define FILE_ABORT_ALLOW_THREADS(fobj) \
Py_BLOCK_THREADS fobj->unlocked_count--; \
assert(fobj->unlocked_count >= 0);
#if BUFSIZ < 8192
#define SMALLCHUNK 8192
#else
#define SMALLCHUNK BUFSIZ
#endif
static BoxedFile* dircheck(BoxedFile* f) { static BoxedFile* dircheck(BoxedFile* f) {
#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR) #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
struct stat buf; struct stat buf;
...@@ -336,6 +357,283 @@ Box* fileNew(BoxedClass* cls, Box* s, Box* m) { ...@@ -336,6 +357,283 @@ Box* fileNew(BoxedClass* cls, Box* s, Box* m) {
return new BoxedFile(f, fn, PyString_AsString(m)); return new BoxedFile(f, fn, PyString_AsString(m));
} }
static PyObject* get_line(BoxedFile* f, int n) noexcept {
FILE* fp = f->f_fp;
int c;
char* buf, *end;
size_t total_v_size; /* total # of slots in buffer */
size_t used_v_size; /* # used slots in buffer */
size_t increment; /* amount to increment the buffer */
PyObject* v;
int newlinetypes = f->f_newlinetypes;
int skipnextlf = f->f_skipnextlf;
int univ_newline = f->f_univ_newline;
#if defined(USE_FGETS_IN_GETLINE)
if (n <= 0 && !univ_newline)
return getline_via_fgets(f, fp);
#endif
total_v_size = n > 0 ? n : 100;
v = PyString_FromStringAndSize((char*)NULL, total_v_size);
if (v == NULL)
return NULL;
buf = BUF(v);
end = buf + total_v_size;
for (;;) {
FILE_BEGIN_ALLOW_THREADS(f)
FLOCKFILE(fp);
if (univ_newline) {
c = 'x'; /* Shut up gcc warning */
while (buf != end && (c = GETC(fp)) != EOF) {
if (skipnextlf) {
skipnextlf = 0;
if (c == '\n') {
/* Seeing a \n here with
* skipnextlf true means we
* saw a \r before.
*/
newlinetypes |= NEWLINE_CRLF;
c = GETC(fp);
if (c == EOF)
break;
} else {
newlinetypes |= NEWLINE_CR;
}
}
if (c == '\r') {
skipnextlf = 1;
c = '\n';
} else if (c == '\n')
newlinetypes |= NEWLINE_LF;
*buf++ = c;
if (c == '\n')
break;
}
if (c == EOF) {
if (ferror(fp) && errno == EINTR) {
FUNLOCKFILE(fp);
FILE_ABORT_ALLOW_THREADS(f)
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
/* We executed Python signal handlers and got no exception.
* Now back to reading the line where we left off. */
clearerr(fp);
continue;
}
if (skipnextlf)
newlinetypes |= NEWLINE_CR;
}
} else /* If not universal newlines use the normal loop */
while ((c = GETC(fp)) != EOF && (*buf++ = c) != '\n' && buf != end)
;
FUNLOCKFILE(fp);
FILE_END_ALLOW_THREADS(f)
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
if (c == '\n')
break;
if (c == EOF) {
if (ferror(fp)) {
if (errno == EINTR) {
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
/* We executed Python signal handlers and got no exception.
* Now back to reading the line where we left off. */
clearerr(fp);
continue;
}
PyErr_SetFromErrno(PyExc_IOError);
clearerr(fp);
Py_DECREF(v);
return NULL;
}
clearerr(fp);
if (PyErr_CheckSignals()) {
Py_DECREF(v);
return NULL;
}
break;
}
/* Must be because buf == end */
if (n > 0)
break;
used_v_size = total_v_size;
increment = total_v_size >> 2; /* mild exponential growth */
total_v_size += increment;
if (total_v_size > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold");
Py_DECREF(v);
return NULL;
}
if (_PyString_Resize(&v, total_v_size) < 0)
return NULL;
buf = BUF(v) + used_v_size;
end = BUF(v) + total_v_size;
}
used_v_size = buf - BUF(v);
if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
return NULL;
return v;
}
static PyObject* err_closed(void) noexcept {
PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
return NULL;
}
static PyObject* err_mode(const char* action) noexcept {
PyErr_Format(PyExc_IOError, "File not open for %s", action);
return NULL;
}
/* Refuse regular file I/O if there's data in the iteration-buffer.
* Mixing them would cause data to arrive out of order, as the read*
* methods don't use the iteration buffer. */
static PyObject* err_iterbuffered(void) noexcept {
PyErr_SetString(PyExc_ValueError, "Mixing iteration and read methods would lose data");
return NULL;
}
static PyObject* file_readlines(BoxedFile* f, PyObject* args) noexcept {
long sizehint = 0;
PyObject* list = NULL;
PyObject* line;
char small_buffer[SMALLCHUNK];
char* buffer = small_buffer;
size_t buffersize = SMALLCHUNK;
PyObject* big_buffer = NULL;
size_t nfilled = 0;
size_t nread;
size_t totalread = 0;
char* p, *q, *end;
int err;
int shortread = 0; /* bool, did the previous read come up short? */
if (f->f_fp == NULL)
return err_closed();
if (!f->readable)
return err_mode("reading");
/* refuse to mix with f.next() */
if (f->f_buf != NULL && (f->f_bufend - f->f_bufptr) > 0 && f->f_buf[0] != '\0')
return err_iterbuffered();
if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
return NULL;
if ((list = PyList_New(0)) == NULL)
return NULL;
for (;;) {
if (shortread)
nread = 0;
else {
FILE_BEGIN_ALLOW_THREADS(f)
errno = 0;
nread = Py_UniversalNewlineFread(buffer + nfilled, buffersize - nfilled, f->f_fp, (PyObject*)f);
FILE_END_ALLOW_THREADS(f)
shortread = (nread < buffersize - nfilled);
}
if (nread == 0) {
sizehint = 0;
if (!ferror(f->f_fp))
break;
if (errno == EINTR) {
if (PyErr_CheckSignals()) {
goto error;
}
clearerr(f->f_fp);
shortread = 0;
continue;
}
PyErr_SetFromErrno(PyExc_IOError);
clearerr(f->f_fp);
goto error;
}
totalread += nread;
p = (char*)memchr(buffer + nfilled, '\n', nread);
if (p == NULL) {
/* Need a larger buffer to fit this line */
nfilled += nread;
buffersize *= 2;
if (buffersize > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold");
goto error;
}
if (big_buffer == NULL) {
/* Create the big buffer */
big_buffer = PyString_FromStringAndSize(NULL, buffersize);
if (big_buffer == NULL)
goto error;
buffer = PyString_AS_STRING(big_buffer);
memcpy(buffer, small_buffer, nfilled);
} else {
/* Grow the big buffer */
if (_PyString_Resize(&big_buffer, buffersize) < 0)
goto error;
buffer = PyString_AS_STRING(big_buffer);
}
continue;
}
end = buffer + nfilled + nread;
q = buffer;
do {
/* Process complete lines */
p++;
line = PyString_FromStringAndSize(q, p - q);
if (line == NULL)
goto error;
err = PyList_Append(list, line);
Py_DECREF(line);
if (err != 0)
goto error;
q = p;
p = (char*)memchr(q, '\n', end - q);
} while (p != NULL);
/* Move the remaining incomplete line to the start */
nfilled = end - q;
memmove(buffer, q, nfilled);
if (sizehint > 0)
if (totalread >= (size_t)sizehint)
break;
}
if (nfilled != 0) {
/* Partial last line */
line = PyString_FromStringAndSize(buffer, nfilled);
if (line == NULL)
goto error;
if (sizehint > 0) {
/* Need to complete the last line */
PyObject* rest = get_line(f, 0);
if (rest == NULL) {
Py_DECREF(line);
goto error;
}
PyString_Concat(&line, rest);
Py_DECREF(rest);
if (line == NULL)
goto error;
}
err = PyList_Append(list, line);
Py_DECREF(line);
if (err != 0)
goto error;
}
cleanup:
Py_XDECREF(big_buffer);
return list;
error:
Py_CLEAR(list);
goto cleanup;
}
Box* fileIterNext(BoxedFile* s) { Box* fileIterNext(BoxedFile* s) {
return fileReadline1(s); return fileReadline1(s);
} }
...@@ -384,11 +682,6 @@ extern "C" int PyFile_WriteObject(PyObject* v, PyObject* f, int flags) noexcept ...@@ -384,11 +682,6 @@ extern "C" int PyFile_WriteObject(PyObject* v, PyObject* f, int flags) noexcept
} }
} }
static PyObject* err_closed(void) noexcept {
PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
return NULL;
}
extern "C" int PyFile_WriteString(const char* s, PyObject* f) noexcept { extern "C" int PyFile_WriteString(const char* s, PyObject* f) noexcept {
if (f == NULL) { if (f == NULL) {
/* Should be caused by a pre-existing error */ /* Should be caused by a pre-existing error */
...@@ -396,7 +689,7 @@ extern "C" int PyFile_WriteString(const char* s, PyObject* f) noexcept { ...@@ -396,7 +689,7 @@ extern "C" int PyFile_WriteString(const char* s, PyObject* f) noexcept {
PyErr_SetString(PyExc_SystemError, "null file for PyFile_WriteString"); PyErr_SetString(PyExc_SystemError, "null file for PyFile_WriteString");
return -1; return -1;
} else if (PyFile_Check(f)) { } else if (PyFile_Check(f)) {
PyFileObject* fobj = (PyFileObject*)f; BoxedFile* fobj = (BoxedFile*)f;
FILE* fp = PyFile_AsFile(f); FILE* fp = PyFile_AsFile(f);
if (fp == NULL) { if (fp == NULL) {
err_closed(); err_closed();
...@@ -519,6 +812,93 @@ extern "C" int PyFile_SoftSpace(PyObject* f, int newflag) noexcept { ...@@ -519,6 +812,93 @@ extern "C" int PyFile_SoftSpace(PyObject* f, int newflag) noexcept {
} }
} }
/*
** Py_UniversalNewlineFread is an fread variation that understands
** all of \r, \n and \r\n conventions.
** The stream should be opened in binary mode.
** fobj must be a PyFileObject. In this case there
** is no readahead but in stead a flag is used to skip a following
** \n on the next read. Also, if the file is open in binary mode
** the whole conversion is skipped. Finally, the routine keeps track of
** the different types of newlines seen.
*/
extern "C" size_t Py_UniversalNewlineFread(char* buf, size_t n, FILE* stream, PyObject* fobj) noexcept {
char* dst = buf;
BoxedFile* f = (BoxedFile*)fobj;
int newlinetypes, skipnextlf;
assert(buf != NULL);
assert(stream != NULL);
if (!fobj || !PyFile_Check(fobj)) {
errno = ENXIO; /* What can you do... */
return 0;
}
if (!f->f_univ_newline)
return fread(buf, 1, n, stream);
newlinetypes = f->f_newlinetypes;
skipnextlf = f->f_skipnextlf;
/* Invariant: n is the number of bytes remaining to be filled
* in the buffer.
*/
while (n) {
size_t nread;
int shortread;
char* src = dst;
nread = fread(dst, 1, n, stream);
assert(nread <= n);
if (nread == 0)
break;
n -= nread; /* assuming 1 byte out for each in; will adjust */
shortread = n != 0; /* true iff EOF or error */
while (nread--) {
char c = *src++;
if (c == '\r') {
/* Save as LF and set flag to skip next LF. */
*dst++ = '\n';
skipnextlf = 1;
} else if (skipnextlf && c == '\n') {
/* Skip LF, and remember we saw CR LF. */
skipnextlf = 0;
newlinetypes |= NEWLINE_CRLF;
++n;
} else {
/* Normal char to be stored in buffer. Also
* update the newlinetypes flag if either this
* is an LF or the previous char was a CR.
*/
if (c == '\n')
newlinetypes |= NEWLINE_LF;
else if (skipnextlf)
newlinetypes |= NEWLINE_CR;
*dst++ = c;
skipnextlf = 0;
}
}
if (shortread) {
/* If this is EOF, update type flags. */
if (skipnextlf && feof(stream))
newlinetypes |= NEWLINE_CR;
break;
}
}
f->f_newlinetypes = newlinetypes;
f->f_skipnextlf = skipnextlf;
return dst - buf;
}
PyDoc_STRVAR(readlines_doc, "readlines([size]) -> list of strings, each a line from the file.\n"
"\n"
"Call readline() repeatedly and return a list of the lines so read.\n"
"The optional size argument, if given, is an approximate bound on the\n"
"total number of bytes in the lines returned.");
PyMethodDef file_methods[] = {
{ "readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc },
};
void setupFile() { void setupFile() {
file_cls->giveAttr("read", file_cls->giveAttr("read",
new BoxedFunction(boxRTFunction((void*)fileRead, STR, 2, 1, false, false), { boxInt(-1) })); new BoxedFunction(boxRTFunction((void*)fileRead, STR, 2, 1, false, false), { boxInt(-1) }));
...@@ -545,6 +925,10 @@ void setupFile() { ...@@ -545,6 +925,10 @@ void setupFile() {
file_cls->giveAttr("__new__", new BoxedFunction(boxRTFunction((void*)fileNew, UNKNOWN, 3, 1, false, false), file_cls->giveAttr("__new__", new BoxedFunction(boxRTFunction((void*)fileNew, UNKNOWN, 3, 1, false, false),
{ boxStrConstant("r") })); { boxStrConstant("r") }));
for (auto& md : file_methods) {
file_cls->giveAttr(md.ml_name, new BoxedMethodDescriptor(&md, file_cls));
}
file_cls->freeze(); file_cls->freeze();
} }
......
...@@ -1833,6 +1833,23 @@ extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept { ...@@ -1833,6 +1833,23 @@ extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
return 0; return 0;
} }
extern "C" void PyString_Concat(register PyObject** pv, register PyObject* w) noexcept {
try {
if (*pv == NULL)
return;
if (w == NULL || !PyString_Check(*pv)) {
*pv = NULL;
return;
}
*pv = strAdd((BoxedString*)*pv, w);
} catch (ExcInfo e) {
setCAPIException(e);
*pv = NULL;
}
}
extern "C" void PyString_ConcatAndDel(register PyObject** pv, register PyObject* w) noexcept { extern "C" void PyString_ConcatAndDel(register PyObject** pv, register PyObject* w) noexcept {
Py_FatalError("unimplemented"); Py_FatalError("unimplemented");
} }
......
...@@ -1139,6 +1139,8 @@ void setupRuntime() { ...@@ -1139,6 +1139,8 @@ void setupRuntime() {
closure_cls->freeze(); closure_cls->freeze();
setupCAPI();
setupBool(); setupBool();
setupInt(); setupInt();
setupLong(); setupLong();
...@@ -1206,8 +1208,6 @@ void setupRuntime() { ...@@ -1206,8 +1208,6 @@ void setupRuntime() {
setupThread(); setupThread();
setupGC(); setupGC();
setupCAPI();
PyType_Ready(&PyCapsule_Type); PyType_Ready(&PyCapsule_Type);
initerrno(); initerrno();
......
...@@ -39,6 +39,9 @@ with open('README.md') as f: ...@@ -39,6 +39,9 @@ with open('README.md') as f:
print lines[:5] print lines[:5]
print lines[-5:] print lines[-5:]
with open('README.md') as f:
print len(f.readlines())
# Check that opening a non-existent file results in an IOError. # Check that opening a non-existent file results in an IOError.
try: try:
f = open('this-should-definitely-not-exist.txt') f = open('this-should-definitely-not-exist.txt')
...@@ -46,6 +49,6 @@ except IOError as e: ...@@ -46,6 +49,6 @@ except IOError as e:
print str(e) print str(e)
f = open("/dev/null", "w") f = open("/dev/null", "w")
f.write("hello world") print f.write("hello world")
print f.flush() print f.flush()
f.close() print f.close()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment