Commit f93c5045 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Parsing: don't read back the cache file we just wrote

Instead, save the file data and parse that instead.
I think that should help with cases where the cached
file was getting trampled.
parent 823f5bf7
...@@ -1037,38 +1037,44 @@ const char* getMagic() { ...@@ -1037,38 +1037,44 @@ const char* getMagic() {
#define LENGTH_LENGTH sizeof(int) #define LENGTH_LENGTH sizeof(int)
#define CHECKSUM_LENGTH 1 #define CHECKSUM_LENGTH 1
enum class ParseResult { // Does at least one of: returns a valid file_data vector, or fills in 'module'
SUCCESS, static std::vector<char> _reparse(const char* fn, const std::string& cache_fn, AST_Module*& module) {
PYC_UNWRITABLE,
};
static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Module*& module) {
FILE* cache_fp = fopen(cache_fn.c_str(), "w"); FILE* cache_fp = fopen(cache_fn.c_str(), "w");
if (!cache_fp)
return ParseResult::PYC_UNWRITABLE;
if (DEBUG_PARSING) { if (DEBUG_PARSING) {
fprintf(stderr, "_reparse('%s', '%s'), pypa=%d\n", fn, cache_fn.c_str(), ENABLE_PYPA_PARSER); fprintf(stderr, "_reparse('%s', '%s'), pypa=%d\n", fn, cache_fn.c_str(), ENABLE_PYPA_PARSER);
fprintf(stderr, "writing magic string: %d %d %d %d\n", getMagic()[0], getMagic()[1], getMagic()[2], fprintf(stderr, "writing magic string: %d %d %d %d\n", getMagic()[0], getMagic()[1], getMagic()[2],
getMagic()[3]); getMagic()[3]);
} }
fwrite(getMagic(), 1, MAGIC_STRING_LENGTH, cache_fp);
int checksum_start = ftell(cache_fp); std::vector<char> file_data;
if (cache_fp)
fwrite(getMagic(), 1, MAGIC_STRING_LENGTH, cache_fp);
file_data.insert(file_data.end(), getMagic(), getMagic() + MAGIC_STRING_LENGTH);
int checksum_start = file_data.size();
int bytes_written = -1; int bytes_written = -1;
static_assert(sizeof(bytes_written) == LENGTH_LENGTH, ""); static_assert(sizeof(bytes_written) == LENGTH_LENGTH, "");
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp); if (cache_fp)
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
file_data.insert(file_data.end(), (char*)&bytes_written, (char*)&bytes_written + LENGTH_LENGTH);
bytes_written = 0; bytes_written = 0;
uint8_t checksum = -1; uint8_t checksum = -1;
static_assert(sizeof(checksum) == CHECKSUM_LENGTH, ""); static_assert(sizeof(checksum) == CHECKSUM_LENGTH, "");
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp); if (cache_fp)
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp);
file_data.insert(file_data.end(), (char*)&checksum, (char*)&checksum + CHECKSUM_LENGTH);
checksum = 0; checksum = 0;
if (ENABLE_PYPA_PARSER) { if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn); module = pypa_parse(fn);
RELEASE_ASSERT(module, "unknown parse error"); RELEASE_ASSERT(module, "unknown parse error");
if (!cache_fp)
return std::vector<char>();
auto p = serializeAST(module, cache_fp); auto p = serializeAST(module, cache_fp);
checksum = p.second; checksum = p.second;
bytes_written += p.first; bytes_written += p.first;
...@@ -1080,7 +1086,10 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod ...@@ -1080,7 +1086,10 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod
if (nread == 0) if (nread == 0)
break; break;
bytes_written += nread; bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
if (cache_fp)
fwrite(buf, 1, nread, cache_fp);
file_data.insert(file_data.end(), buf, buf + nread);
for (int i = 0; i < nread; i++) { for (int i = 0; i < nread; i++) {
checksum ^= buf[i]; checksum ^= buf[i];
...@@ -1091,11 +1100,16 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod ...@@ -1091,11 +1100,16 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod
} }
fseek(cache_fp, checksum_start, SEEK_SET); fseek(cache_fp, checksum_start, SEEK_SET);
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp); if (cache_fp)
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp); fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
memcpy(&file_data[checksum_start], &bytes_written, LENGTH_LENGTH);
if (cache_fp)
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp);
memcpy(&file_data[checksum_start + LENGTH_LENGTH], &checksum, CHECKSUM_LENGTH);
fclose(cache_fp); if (cache_fp)
return ParseResult::SUCCESS; fclose(cache_fp);
return std::move(file_data);
} }
// Parsing the file is somewhat expensive since we have to shell out to cpython; // Parsing the file is somewhat expensive since we have to shell out to cpython;
...@@ -1120,48 +1134,41 @@ AST_Module* caching_parse_file(const char* fn) { ...@@ -1120,48 +1134,41 @@ AST_Module* caching_parse_file(const char* fn) {
code = stat(fn, &source_stat); code = stat(fn, &source_stat);
assert(code == 0); assert(code == 0);
code = stat(cache_fn.c_str(), &cache_stat); code = stat(cache_fn.c_str(), &cache_stat);
if (code != 0 || cache_stat.st_mtime < source_stat.st_mtime
|| (cache_stat.st_mtime == source_stat.st_mtime && cache_stat.st_mtim.tv_nsec < source_stat.st_mtim.tv_nsec)) {
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
}
static const int MAX_TRIES = 5;
std::vector<char> file_data; std::vector<char> file_data;
int tries = 0; if (code != 0 && (cache_stat.st_mtime > source_stat.st_mtime
while (true) { || (cache_stat.st_mtime == source_stat.st_mtime
oss << "try number " << tries << '\n'; && cache_stat.st_mtim.tv_nsec > source_stat.st_mtim.tv_nsec))) {
FILE* fp = fopen(cache_fn.c_str(), "r"); oss << "reading pyc file\n";
char buf[1024];
bool good = (bool)fp; FILE* cache_fp = fopen(cache_fn.c_str(), "r");
if (cache_fp) {
if (good) {
char buf[1024];
while (true) { while (true) {
int read = fread(buf, 1, 1024, fp); int read = fread(buf, 1, 1024, cache_fp);
for (int i = 0; i < read; i++) file_data.insert(file_data.end(), buf, buf + read);
file_data.push_back(buf[i]);
if (read == 0) { if (read == 0) {
if (ferror(fp)) { if (ferror(cache_fp)) {
oss << "encountered io error reading from the file\n"; oss << "encountered io error reading from the file\n";
if (tries == MAX_TRIES) AST_Module* mod = 0;
fprintf(stderr, "Error reading %s: %s\n", cache_fn.c_str(), strerror(errno)); file_data = _reparse(fn, cache_fn, mod);
good = false; if (mod)
return mod;
assert(file_data.size());
} }
break; break;
} }
} }
fclose(cache_fp);
fclose(fp);
fp = NULL;
} }
}
static const int MAX_TRIES = 5;
int tries = 0;
while (true) {
oss << "try number " << tries << '\n';
bool good = true;
if (file_data.size() < MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH) { if (file_data.size() < MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH) {
oss << "file not long enough to include header\n"; oss << "file not long enough to include header\n";
...@@ -1243,16 +1250,13 @@ AST_Module* caching_parse_file(const char* fn) { ...@@ -1243,16 +1250,13 @@ AST_Module* caching_parse_file(const char* fn) {
DEBUG_PARSING = true; DEBUG_PARSING = true;
if (!good) { if (!good) {
assert(!fp);
file_data.clear(); file_data.clear();
AST_Module* mod = 0; AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod); file_data = _reparse(fn, cache_fn, mod);
if (mod) if (mod)
return mod; return mod;
assert(file_data.size());
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment