Commit 729a81c5 authored by Marius Wachtler's avatar Marius Wachtler

Compress cache files and remove old cache files.

Adds the LZ4 compression library and use it for compressing cached objects.
This saves alot of space (on my test it reduces the required space to about one-tenth),
and adds a checksum to the file in order to detect truncated cache files,
without reducing the speed.
parent b6ebc815
...@@ -10,3 +10,6 @@ ...@@ -10,3 +10,6 @@
[submodule "test/integration/virtualenv"] [submodule "test/integration/virtualenv"]
path = test/integration/virtualenv path = test/integration/virtualenv
url = https://github.com/dropbox/virtualenv url = https://github.com/dropbox/virtualenv
[submodule "lz4"]
path = lz4
url = git://github.com/Cyan4973/lz4.git
...@@ -116,6 +116,10 @@ ExternalProject_Add(libunwind ...@@ -116,6 +116,10 @@ ExternalProject_Add(libunwind
add_subdirectory(libpypa) add_subdirectory(libpypa)
add_dependencies(pypa gitsubmodules) add_dependencies(pypa gitsubmodules)
# lz4
add_subdirectory(lz4/cmake_unofficial)
add_dependencies(lz4 gitsubmodules)
# valgrind # valgrind
if(ENABLE_VALGRIND) if(ENABLE_VALGRIND)
find_package(Valgrind REQUIRED) find_package(Valgrind REQUIRED)
...@@ -182,7 +186,7 @@ add_subdirectory(tools) ...@@ -182,7 +186,7 @@ add_subdirectory(tools)
add_executable(pyston $<TARGET_OBJECTS:PYSTON_MAIN_OBJECT> $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>) add_executable(pyston $<TARGET_OBJECTS:PYSTON_MAIN_OBJECT> $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>)
# Wrap the stdlib in --whole-archive to force all the symbols to be included and eventually exported # Wrap the stdlib in --whole-archive to force all the symbols to be included and eventually exported
target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m readline sqlite3 gmp ssl crypto unwind pypa double-conversion ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES}) target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m readline sqlite3 gmp ssl crypto unwind pypa liblz4 double-conversion ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES})
# copy src/codegen/parse_ast.py to the build directory # copy src/codegen/parse_ast.py to the build directory
add_custom_command(TARGET pyston POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/src/codegen/parse_ast.py ${CMAKE_BINARY_DIR}/src/codegen/parse_ast.py) add_custom_command(TARGET pyston POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/src/codegen/parse_ast.py ${CMAKE_BINARY_DIR}/src/codegen/parse_ast.py)
......
...@@ -88,3 +88,32 @@ products or services of Licensee, or any third party. ...@@ -88,3 +88,32 @@ products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee 8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License agrees to be bound by the terms and conditions of this License
Agreement. Agreement.
------
LZ4 Library
Copyright (c) 2011-2014, Yann Collet
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
------
...@@ -155,6 +155,7 @@ COMMON_CXXFLAGS += -fexceptions -fno-rtti ...@@ -155,6 +155,7 @@ COMMON_CXXFLAGS += -fexceptions -fno-rtti
COMMON_CXXFLAGS += -Wno-invalid-offsetof # allow the use of "offsetof", and we'll just have to make sure to only use it legally. COMMON_CXXFLAGS += -Wno-invalid-offsetof # allow the use of "offsetof", and we'll just have to make sure to only use it legally.
COMMON_CXXFLAGS += -DENABLE_INTEL_JIT_EVENTS=$(ENABLE_INTEL_JIT_EVENTS) COMMON_CXXFLAGS += -DENABLE_INTEL_JIT_EVENTS=$(ENABLE_INTEL_JIT_EVENTS)
COMMON_CXXFLAGS += -I$(DEPS_DIR)/pypa-install/include COMMON_CXXFLAGS += -I$(DEPS_DIR)/pypa-install/include
COMMON_CXXFLAGS += -I$(DEPS_DIR)/lz4-install/include
ifeq ($(ENABLE_VALGRIND),0) ifeq ($(ENABLE_VALGRIND),0)
COMMON_CXXFLAGS += -DNVALGRIND COMMON_CXXFLAGS += -DNVALGRIND
...@@ -170,6 +171,7 @@ COMMON_CXXFLAGS += -DDEFAULT_PYTHON_MAJOR_VERSION=$(PYTHON_MAJOR_VERSION) -DDEFA ...@@ -170,6 +171,7 @@ COMMON_CXXFLAGS += -DDEFAULT_PYTHON_MAJOR_VERSION=$(PYTHON_MAJOR_VERSION) -DDEFA
# Use our "custom linker" that calls gold if available # Use our "custom linker" that calls gold if available
COMMON_LDFLAGS := -B$(TOOLS_DIR)/build_system -L/usr/local/lib -lpthread -lm -lunwind -llzma -L$(DEPS_DIR)/gcc-4.8.2-install/lib64 -lreadline -lgmp -lssl -lcrypto -lsqlite3 COMMON_LDFLAGS := -B$(TOOLS_DIR)/build_system -L/usr/local/lib -lpthread -lm -lunwind -llzma -L$(DEPS_DIR)/gcc-4.8.2-install/lib64 -lreadline -lgmp -lssl -lcrypto -lsqlite3
COMMON_LDFLAGS += $(DEPS_DIR)/pypa-install/lib/libpypa.a COMMON_LDFLAGS += $(DEPS_DIR)/pypa-install/lib/libpypa.a
COMMON_LDFLAGS += $(DEPS_DIR)/lz4-install/lib/liblz4.a
# Conditionally add libtinfo if available - otherwise nothing will be added # Conditionally add libtinfo if available - otherwise nothing will be added
COMMON_LDFLAGS += `pkg-config tinfo 2>/dev/null && pkg-config tinfo --libs || echo ""` COMMON_LDFLAGS += `pkg-config tinfo 2>/dev/null && pkg-config tinfo --libs || echo ""`
......
...@@ -140,6 +140,15 @@ cd gtest-1.7.0 ...@@ -140,6 +140,15 @@ cd gtest-1.7.0
make -j4 make -j4
``` ```
### LZ4
```
cd ~/pyston_deps
git clone git://github.com/Cyan4973/lz4.git
mkdir lz4-install
cd lz4/lib
DESTDIR="$HOME/pyston_deps/lz4-install" PREFIX="/" make install
```
--- ---
At this point you should be able to run `make check` (in the `~/pyston` directory) and pass the tests. See the main README for more information about available targets and options. At this point you should be able to run `make check` (in the `~/pyston` directory) and pass the tests. See the main README for more information about available targets and options.
......
Subproject commit 160661c7a4cbf805f4af74d2e3932a17a66e6ce7
...@@ -8,6 +8,7 @@ set_source_files_properties(jit.cpp PROPERTIES COMPILE_DEFINITIONS "GITREV=${GIT ...@@ -8,6 +8,7 @@ set_source_files_properties(jit.cpp PROPERTIES COMPILE_DEFINITIONS "GITREV=${GIT
include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR})
include_directories(${CMAKE_BINARY_DIR}/libunwind/include) include_directories(${CMAKE_BINARY_DIR}/libunwind/include)
include_directories(${CMAKE_SOURCE_DIR}/libpypa/src) include_directories(${CMAKE_SOURCE_DIR}/libpypa/src)
include_directories(${CMAKE_SOURCE_DIR}/lz4/lib)
if(ENABLE_GPERFTOOLS) if(ENABLE_GPERFTOOLS)
set(OPTIONAL_SRCS ${OPTIONAL_SRCS} codegen/profiling/pprof.cpp) set(OPTIONAL_SRCS ${OPTIONAL_SRCS} codegen/profiling/pprof.cpp)
...@@ -109,7 +110,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS} ...@@ -109,7 +110,7 @@ add_library(PYSTON_OBJECTS OBJECT ${OPTIONAL_SRCS}
add_dependencies(PYSTON_OBJECTS libunwind pypa ${LLVM_LIBS}) add_dependencies(PYSTON_OBJECTS libunwind pypa ${LLVM_LIBS})
add_library(PYSTON_MAIN_OBJECT OBJECT jit.cpp) add_library(PYSTON_MAIN_OBJECT OBJECT jit.cpp)
add_dependencies(PYSTON_MAIN_OBJECT libunwind pypa ${LLVM_LIBS}) add_dependencies(PYSTON_MAIN_OBJECT libunwind pypa liblz4 ${LLVM_LIBS})
# build stdlib # build stdlib
add_subdirectory(runtime/inline) add_subdirectory(runtime/inline)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <cstdio> #include <cstdio>
#include <iostream> #include <iostream>
#include <lz4frame.h>
#include <openssl/evp.h> #include <openssl/evp.h>
#include <unordered_map> #include <unordered_map>
...@@ -120,6 +121,71 @@ static llvm::Module* loadStdlib() { ...@@ -120,6 +121,71 @@ static llvm::Module* loadStdlib() {
return m; return m;
} }
class CompressedFile {
public:
static bool writeFile(llvm::StringRef file_name, llvm::StringRef data) {
std::error_code error_code;
llvm::raw_fd_ostream file(file_name, error_code, llvm::sys::fs::F_RW);
if (error_code)
return false;
LZ4F_preferences_t preferences;
memset(&preferences, 0, sizeof(preferences));
preferences.frameInfo.contentChecksumFlag = contentChecksumEnabled;
preferences.frameInfo.contentSize = data.size();
std::vector<char> compressed;
size_t max_size = LZ4F_compressFrameBound(data.size(), &preferences);
compressed.resize(max_size);
size_t compressed_size = LZ4F_compressFrame(&compressed[0], max_size, data.data(), data.size(), &preferences);
if (LZ4F_isError(compressed_size))
return false;
file.write(compressed.data(), compressed_size);
return true;
}
static std::unique_ptr<llvm::MemoryBuffer> getFile(llvm::StringRef file_name) {
auto compressed_content = llvm::MemoryBuffer::getFile(file_name, -1, false);
if (!compressed_content)
return std::unique_ptr<llvm::MemoryBuffer>();
LZ4F_decompressionContext_t context;
LZ4F_createDecompressionContext(&context, LZ4F_VERSION);
LZ4F_frameInfo_t frame_info;
memset(&frame_info, 0, sizeof(frame_info));
const char* start = (*compressed_content)->getBufferStart();
size_t pos = 0;
size_t compressed_size = (*compressed_content)->getBufferSize();
size_t remaining = compressed_size - pos;
LZ4F_getFrameInfo(context, &frame_info, start, &remaining);
pos += remaining;
std::vector<char> uncompressed;
uncompressed.reserve(frame_info.contentSize);
while (pos < compressed_size) {
unsigned char buff[4096];
size_t buff_size = sizeof(buff);
remaining = compressed_size - pos;
size_t error_code = LZ4F_decompress(context, buff, &buff_size, start + pos, &remaining, NULL);
if (LZ4F_isError(error_code)) {
LZ4F_freeDecompressionContext(context);
return std::unique_ptr<llvm::MemoryBuffer>();
}
pos += remaining;
if (buff_size != 0)
uncompressed.insert(uncompressed.end(), buff, buff + buff_size);
}
LZ4F_freeDecompressionContext(context);
if (uncompressed.size() != frame_info.contentSize)
return std::unique_ptr<llvm::MemoryBuffer>();
return llvm::MemoryBuffer::getMemBufferCopy(llvm::StringRef(uncompressed.data(), uncompressed.size()));
}
};
class PystonObjectCache : public llvm::ObjectCache { class PystonObjectCache : public llvm::ObjectCache {
private: private:
// Stream which calculates the SHA256 hash of the data writen to. // Stream which calculates the SHA256 hash of the data writen to.
...@@ -154,15 +220,18 @@ private: ...@@ -154,15 +220,18 @@ private:
} }
}; };
llvm::SmallString<128> cache_dir; llvm::SmallString<128> cache_dir;
std::string module_identifier; std::string module_identifier;
std::string hash_before_codegen; std::string hash_before_codegen;
public: public:
PystonObjectCache() { PystonObjectCache() {
llvm::sys::fs::current_path(cache_dir); llvm::sys::path::home_directory(cache_dir);
llvm::sys::path::append(cache_dir, "pyston_object_cache"); llvm::sys::path::append(cache_dir, ".cache");
llvm::sys::path::append(cache_dir, "pyston");
llvm::sys::path::append(cache_dir, "object_cache");
cleanupCacheDirectory();
} }
...@@ -177,14 +246,10 @@ public: ...@@ -177,14 +246,10 @@ public:
llvm::SmallString<128> cache_file = cache_dir; llvm::SmallString<128> cache_file = cache_dir;
llvm::sys::path::append(cache_file, hash_before_codegen); llvm::sys::path::append(cache_file, hash_before_codegen);
if (!llvm::sys::fs::exists(cache_dir.str()) && llvm::sys::fs::create_directory(cache_dir.str())) { if (!llvm::sys::fs::exists(cache_dir.str()) && llvm::sys::fs::create_directories(cache_dir.str()))
fprintf(stderr, "Unable to create cache directory\n");
return; return;
}
std::error_code error_code; CompressedFile::writeFile(cache_file, Obj.getBuffer());
llvm::raw_fd_ostream IRObjectFile(cache_file.c_str(), error_code, llvm::sys::fs::F_RW);
RELEASE_ASSERT(!error_code, "");
IRObjectFile << Obj.getBuffer();
} }
#if LLVMREV < 215566 #if LLVMREV < 215566
...@@ -211,15 +276,15 @@ public: ...@@ -211,15 +276,15 @@ public:
// - clear the cache directory // - clear the cache directory
// - run pyston // - run pyston
// - run pyston a second time // - run pyston a second time
// - Now look for "*_second" files in the cache directory and compare them to the "*_first" IR dump // - Now look for "*_second.ll" files in the cache directory and compare them to the "*_first.ll" IR dump
std::string llvm_ir; std::string llvm_ir;
llvm::raw_string_ostream sstr(llvm_ir); llvm::raw_string_ostream sstr(llvm_ir);
M->print(sstr, 0); M->print(sstr, 0);
sstr.flush(); sstr.flush();
std::string filename = cache_dir.str().str() + "/" + module_identifier + "_first"; std::string filename = cache_dir.str().str() + "/" + module_identifier + "_first.ll";
if (llvm::sys::fs::exists(filename)) if (llvm::sys::fs::exists(filename))
filename = cache_dir.str().str() + "/" + module_identifier + "_second"; filename = cache_dir.str().str() + "/" + module_identifier + "_second.ll";
FILE* f = fopen(filename.c_str(), "wt"); FILE* f = fopen(filename.c_str(), "wt");
fwrite(llvm_ir.c_str(), 1, llvm_ir.size(), f); fwrite(llvm_ir.c_str(), 1, llvm_ir.size(), f);
fclose(f); fclose(f);
...@@ -230,19 +295,40 @@ public: ...@@ -230,19 +295,40 @@ public:
return NULL; return NULL;
} }
auto rtn = llvm::MemoryBuffer::getFile(cache_file.str(), -1, false); std::unique_ptr<llvm::MemoryBuffer> mem_buff = CompressedFile::getFile(cache_file);
if (!rtn) { if (!mem_buff) {
jit_objectcache_misses.log(); jit_objectcache_misses.log();
return NULL; return NULL;
} }
jit_objectcache_hits.log(); jit_objectcache_hits.log();
return mem_buff;
}
void cleanupCacheDirectory() {
// Find all files inside the cache directory, if the number of files is larger than
// MAX_OBJECT_CACHE_ENTRIES,
// sort them by last modification time and remove the oldest excessive ones.
typedef std::pair<std::string, llvm::sys::TimeValue> CacheFileEntry;
std::vector<CacheFileEntry> cache_files;
std::error_code ec;
for (llvm::sys::fs::directory_iterator file(cache_dir.str(), ec), end; !ec && file != end; file.increment(ec)) {
llvm::sys::fs::file_status status;
if (file->status(status))
continue; // ignore files where we can't retrieve the file status.
cache_files.emplace_back(std::make_pair(file->path(), status.getLastModificationTime()));
}
int num_expired = cache_files.size() - MAX_OBJECT_CACHE_ENTRIES;
if (num_expired <= 0)
return;
std::stable_sort(cache_files.begin(), cache_files.end(),
[](const CacheFileEntry& lhs, const CacheFileEntry& rhs) { return lhs.second < rhs.second; });
// MCJIT will want to write into this buffer, and we don't want that for (int i = 0; i < num_expired; ++i)
// because the file has probably just been mmapped. Instead we make llvm::sys::fs::remove(cache_files[i].first);
// a copy. The filed-based buffer will be released when it goes
// out of scope.
return llvm::MemoryBuffer::getMemBufferCopy((*rtn)->getBuffer());
} }
}; };
......
...@@ -50,6 +50,8 @@ int OSR_THRESHOLD_T2 = 10000; ...@@ -50,6 +50,8 @@ int OSR_THRESHOLD_T2 = 10000;
int REOPT_THRESHOLD_T2 = 10000; int REOPT_THRESHOLD_T2 = 10000;
int SPECULATION_THRESHOLD = 100; int SPECULATION_THRESHOLD = 100;
int MAX_OBJECT_CACHE_ENTRIES = 500;
static bool _GLOBAL_ENABLE = 1; static bool _GLOBAL_ENABLE = 1;
bool ENABLE_ICS = 1 && _GLOBAL_ENABLE; bool ENABLE_ICS = 1 && _GLOBAL_ENABLE;
bool ENABLE_ICGENERICS = 1 && ENABLE_ICS; bool ENABLE_ICGENERICS = 1 && ENABLE_ICS;
......
...@@ -35,6 +35,7 @@ extern int OSR_THRESHOLD_INTERPRETER, REOPT_THRESHOLD_INTERPRETER; ...@@ -35,6 +35,7 @@ extern int OSR_THRESHOLD_INTERPRETER, REOPT_THRESHOLD_INTERPRETER;
extern int OSR_THRESHOLD_BASELINE, REOPT_THRESHOLD_BASELINE; extern int OSR_THRESHOLD_BASELINE, REOPT_THRESHOLD_BASELINE;
extern int OSR_THRESHOLD_T2, REOPT_THRESHOLD_T2; extern int OSR_THRESHOLD_T2, REOPT_THRESHOLD_T2;
extern int SPECULATION_THRESHOLD; extern int SPECULATION_THRESHOLD;
extern int MAX_OBJECT_CACHE_ENTRIES;
extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB, extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB,
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC, PAUSE_AT_ABORT; CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC, PAUSE_AT_ABORT;
......
...@@ -9,7 +9,7 @@ add_custom_target(unittests) ...@@ -9,7 +9,7 @@ add_custom_target(unittests)
macro(add_unittest unittest) macro(add_unittest unittest)
add_executable(${unittest}_unittest EXCLUDE_FROM_ALL ${unittest}.cpp $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>) add_executable(${unittest}_unittest EXCLUDE_FROM_ALL ${unittest}.cpp $<TARGET_OBJECTS:PYSTON_OBJECTS> $<TARGET_OBJECTS:FROM_CPYTHON>)
target_link_libraries(${unittest}_unittest stdlib sqlite3 gmp ssl crypto readline pypa double-conversion unwind gtest gtest_main ${LLVM_LIBS} ${LIBLZMA_LIBRARIES}) target_link_libraries(${unittest}_unittest stdlib sqlite3 gmp ssl crypto readline pypa liblz4 double-conversion unwind gtest gtest_main ${LLVM_LIBS} ${LIBLZMA_LIBRARIES})
add_dependencies(unittests ${unittest}_unittest) add_dependencies(unittests ${unittest}_unittest)
endmacro() endmacro()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment