Commit 5926766a authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #813 from Daetalus/test_hash

Rewrite string hash and enable test_hash
parents 961e615e a13167f9
...@@ -102,6 +102,7 @@ file(GLOB_RECURSE STDPYTHON_SRCS Python ...@@ -102,6 +102,7 @@ file(GLOB_RECURSE STDPYTHON_SRCS Python
mystrtoul.c mystrtoul.c
pyctype.c pyctype.c
pystrtod.c pystrtod.c
random.c
structmember.c structmember.c
) )
......
# expected: fail
# test the invariant that # test the invariant that
# iff a==b then hash(a)==hash(b) # iff a==b then hash(a)==hash(b)
# #
......
#include "Python.h"
#ifdef MS_WINDOWS
#include <windows.h>
#else
#include <fcntl.h>
#endif
#ifdef Py_DEBUG
int _Py_HashSecret_Initialized = 0;
#else
static int _Py_HashSecret_Initialized = 0;
#endif
#ifdef MS_WINDOWS
typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
DWORD dwFlags );
typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
BYTE *pbBuffer );
static CRYPTGENRANDOM pCryptGenRandom = NULL;
/* This handle is never explicitly released. Instead, the operating
system will release it when the process terminates. */
static HCRYPTPROV hCryptProv = 0;
static int
win32_urandom_init(int raise)
{
HINSTANCE hAdvAPI32 = NULL;
CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
/* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
hAdvAPI32 = GetModuleHandle("advapi32.dll");
if(hAdvAPI32 == NULL)
goto error;
/* Obtain pointers to the CryptoAPI functions. This will fail on some early
versions of Win95. */
pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
hAdvAPI32, "CryptAcquireContextA");
if (pCryptAcquireContext == NULL)
goto error;
pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
"CryptGenRandom");
if (pCryptGenRandom == NULL)
goto error;
/* Acquire context */
if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
goto error;
return 0;
error:
if (raise)
PyErr_SetFromWindowsErr(0);
else
Py_FatalError("Failed to initialize Windows random API (CryptoGen)");
return -1;
}
/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
API. Return 0 on success, or -1 on error. */
static int
win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
{
Py_ssize_t chunk;
if (hCryptProv == 0)
{
if (win32_urandom_init(raise) == -1)
return -1;
}
while (size > 0)
{
chunk = size > INT_MAX ? INT_MAX : size;
if (!pCryptGenRandom(hCryptProv, chunk, buffer))
{
/* CryptGenRandom() failed */
if (raise)
PyErr_SetFromWindowsErr(0);
else
Py_FatalError("Failed to initialized the randomized hash "
"secret using CryptoGen)");
return -1;
}
buffer += chunk;
size -= chunk;
}
return 0;
}
#endif /* MS_WINDOWS */
#ifdef __VMS
/* Use openssl random routine */
#include <openssl/rand.h>
static int
vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
{
if (RAND_pseudo_bytes(buffer, size) < 0) {
if (raise) {
PyErr_Format(PyExc_ValueError,
"RAND_pseudo_bytes");
} else {
Py_FatalError("Failed to initialize the randomized hash "
"secret using RAND_pseudo_bytes");
}
return -1;
}
return 0;
}
#endif /* __VMS */
#if !defined(MS_WINDOWS) && !defined(__VMS)
/* Read size bytes from /dev/urandom into buffer.
Call Py_FatalError() on error. */
static void
dev_urandom_noraise(char *buffer, Py_ssize_t size)
{
int fd;
Py_ssize_t n;
assert (0 < size);
fd = open("/dev/urandom", O_RDONLY);
if (fd < 0)
Py_FatalError("Failed to open /dev/urandom");
while (0 < size)
{
do {
n = read(fd, buffer, (size_t)size);
} while (n < 0 && errno == EINTR);
if (n <= 0)
{
/* stop on error or if read(size) returned 0 */
Py_FatalError("Failed to read bytes from /dev/urandom");
break;
}
buffer += n;
size -= (Py_ssize_t)n;
}
close(fd);
}
/* Read size bytes from /dev/urandom into buffer.
Return 0 on success, raise an exception and return -1 on error. */
static int
dev_urandom_python(char *buffer, Py_ssize_t size)
{
int fd;
Py_ssize_t n;
if (size <= 0)
return 0;
Py_BEGIN_ALLOW_THREADS
fd = open("/dev/urandom", O_RDONLY);
Py_END_ALLOW_THREADS
if (fd < 0)
{
if (errno == ENOENT || errno == ENXIO ||
errno == ENODEV || errno == EACCES)
PyErr_SetString(PyExc_NotImplementedError,
"/dev/urandom (or equivalent) not found");
else
PyErr_SetFromErrno(PyExc_OSError);
return -1;
}
Py_BEGIN_ALLOW_THREADS
do {
do {
n = read(fd, buffer, (size_t)size);
} while (n < 0 && errno == EINTR);
if (n <= 0)
break;
buffer += n;
size -= (Py_ssize_t)n;
} while (0 < size);
Py_END_ALLOW_THREADS
if (n <= 0)
{
/* stop on error or if read(size) returned 0 */
if (n < 0)
PyErr_SetFromErrno(PyExc_OSError);
else
PyErr_Format(PyExc_RuntimeError,
"Failed to read %zi bytes from /dev/urandom",
size);
close(fd);
return -1;
}
close(fd);
return 0;
}
#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */
/* Fill buffer with pseudo-random bytes generated by a linear congruent
generator (LCG):
x(n+1) = (x(n) * 214013 + 2531011) % 2^32
Use bits 23..16 of x(n) to generate a byte. */
static void
lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
{
size_t index;
unsigned int x;
x = x0;
for (index=0; index < size; index++) {
x *= 214013;
x += 2531011;
/* modulo 2 ^ (8 * sizeof(int)) */
buffer[index] = (x >> 16) & 0xff;
}
}
void
_PyRandom_Init(void)
{
char *env;
void *secret = &_Py_HashSecret;
Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);
if (_Py_HashSecret_Initialized)
return;
_Py_HashSecret_Initialized = 1;
/*
By default, hash randomization is disabled, and only
enabled if PYTHONHASHSEED is set to non-empty or if
"-R" is provided at the command line:
*/
if (!Py_HashRandomizationFlag) {
/* Disable the randomized hash: */
memset(secret, 0, secret_size);
return;
}
/*
Hash randomization is enabled. Generate a per-process secret,
using PYTHONHASHSEED if provided.
*/
env = Py_GETENV("PYTHONHASHSEED");
if (env && *env != '\0' && strcmp(env, "random") != 0) {
char *endptr = env;
unsigned long seed;
seed = strtoul(env, &endptr, 10);
if (*endptr != '\0'
|| seed > 4294967295UL
|| (errno == ERANGE && seed == ULONG_MAX))
{
Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer "
"in range [0; 4294967295]");
}
if (seed == 0) {
/* disable the randomized hash */
memset(secret, 0, secret_size);
}
else {
lcg_urandom(seed, (unsigned char*)secret, secret_size);
}
}
else {
#ifdef MS_WINDOWS
(void)win32_urandom((unsigned char *)secret, secret_size, 0);
#else /* #ifdef MS_WINDOWS */
# ifdef __VMS
vms_urandom((unsigned char *)secret, secret_size, 0);
# else
dev_urandom_noraise((char*)secret, secret_size);
# endif
#endif
}
}
...@@ -301,7 +301,7 @@ static int main(int argc, char** argv) { ...@@ -301,7 +301,7 @@ static int main(int argc, char** argv) {
// Suppress getopt errors so we can throw them ourselves // Suppress getopt errors so we can throw them ourselves
opterr = 0; opterr = 0;
while ((code = getopt(argc, argv, "+:OqdIibpjtrsSvnxEac:FuPTGm:")) != -1) { while ((code = getopt(argc, argv, "+:OqdIibpjtrsRSvnxEac:FuPTGm:")) != -1) {
if (code == 'c') { if (code == 'c') {
assert(optarg); assert(optarg);
command = optarg; command = optarg;
...@@ -312,6 +312,9 @@ static int main(int argc, char** argv) { ...@@ -312,6 +312,9 @@ static int main(int argc, char** argv) {
module = optarg; module = optarg;
// no more option parsing; the rest of our arguments go into sys.argv. // no more option parsing; the rest of our arguments go into sys.argv.
break; break;
} else if (code == 'R') {
Py_HashRandomizationFlag = 1;
break;
} else if (code == ':') { } else if (code == ':') {
fprintf(stderr, "Argument expected for the -%c option\n", optopt); fprintf(stderr, "Argument expected for the -%c option\n", optopt);
return 2; return 2;
...@@ -324,7 +327,13 @@ static int main(int argc, char** argv) { ...@@ -324,7 +327,13 @@ static int main(int argc, char** argv) {
return r; return r;
} }
} }
/* The variable is only tested for existence here; _PyRandom_Init will
check its value further. */
char* p;
if (!Py_HashRandomizationFlag && (p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
Py_HashRandomizationFlag = 1;
_PyRandom_Init();
Stats::startEstimatingCPUFreq(); Stats::startEstimatingCPUFreq();
const char* fn = NULL; const char* fn = NULL;
......
...@@ -39,6 +39,7 @@ BoxedDict* sys_modules_dict; ...@@ -39,6 +39,7 @@ BoxedDict* sys_modules_dict;
extern "C" { extern "C" {
// supposed to be exposed through sys.flags // supposed to be exposed through sys.flags
int Py_BytesWarningFlag = 0; int Py_BytesWarningFlag = 0;
int Py_HashRandomizationFlag = 0;
} }
Box* sysExcInfo() { Box* sysExcInfo() {
......
...@@ -130,9 +130,8 @@ size_t PyHasher::operator()(Box* b) const { ...@@ -130,9 +130,8 @@ size_t PyHasher::operator()(Box* b) const {
ScopedStatTimer _st(pyhasher_timer_counter, 10); ScopedStatTimer _st(pyhasher_timer_counter, 10);
#endif #endif
if (b->cls == str_cls) { if (b->cls == str_cls) {
StringHash<char> H;
auto s = static_cast<BoxedString*>(b); auto s = static_cast<BoxedString*>(b);
return H(s->data(), s->size()); return strHashUnboxed(s);
} }
return hashUnboxed(b); return hashUnboxed(b);
......
...@@ -1522,28 +1522,69 @@ failed: ...@@ -1522,28 +1522,69 @@ failed:
} }
extern "C" size_t unicodeHashUnboxed(PyUnicodeObject* self) { extern "C" size_t unicodeHashUnboxed(PyUnicodeObject* self) {
Py_ssize_t len;
Py_UNICODE* p;
long x;
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
if (self->hash != -1) if (self->hash != -1)
return self->hash; return self->hash;
len = PyUnicode_GET_SIZE(self);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if (len == 0) {
self->hash = 0;
return 0;
}
p = PyUnicode_AS_UNICODE(self);
x = _Py_HashSecret.prefix;
x ^= *p << 7;
while (--len >= 0)
x = (1000003 * x) ^ *p++;
x ^= PyUnicode_GET_SIZE(self);
x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
self->hash = x;
return x;
}
Py_ssize_t len = PyUnicode_GET_SIZE(self); extern "C" size_t strHashUnboxed(BoxedString* self) {
assert(PyString_Check(self));
const char* p;
long x;
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
if (len == 0) long len = Py_SIZE(self);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if (len == 0) {
return 0; return 0;
}
p = self->s().data();
x = _Py_HashSecret.prefix;
x ^= *p << 7;
while (--len >= 0)
x = (1000003 * x) ^ *p++;
x ^= Py_SIZE(self);
x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
Py_UNICODE* p = PyUnicode_AS_UNICODE(self); return x;
pyston::StringHash<Py_UNICODE> H;
return H(p, len);
} }
extern "C" Box* strHash(BoxedString* self) { extern "C" Box* strHash(BoxedString* self) {
assert(PyString_Check(self)); return boxLong(strHashUnboxed(self));
// CPython set the hash empty string to 0 manually
if (self->size() == 0)
return boxInt(0);
StringHash<char> H;
return boxInt(H(self->data(), self->size()));
} }
extern "C" Box* strNonzero(BoxedString* self) { extern "C" Box* strNonzero(BoxedString* self) {
...@@ -2714,7 +2755,7 @@ void setupStr() { ...@@ -2714,7 +2755,7 @@ void setupStr() {
str_cls->giveAttr("__len__", new BoxedFunction(boxRTFunction((void*)strLen, BOXED_INT, 1))); str_cls->giveAttr("__len__", new BoxedFunction(boxRTFunction((void*)strLen, BOXED_INT, 1)));
str_cls->giveAttr("__str__", new BoxedFunction(boxRTFunction((void*)strStr, STR, 1))); str_cls->giveAttr("__str__", new BoxedFunction(boxRTFunction((void*)strStr, STR, 1)));
str_cls->giveAttr("__repr__", new BoxedFunction(boxRTFunction((void*)strRepr, STR, 1))); str_cls->giveAttr("__repr__", new BoxedFunction(boxRTFunction((void*)strRepr, STR, 1)));
str_cls->giveAttr("__hash__", new BoxedFunction(boxRTFunction((void*)strHash, BOXED_INT, 1))); str_cls->giveAttr("__hash__", new BoxedFunction(boxRTFunction((void*)strHash, UNKNOWN, 1)));
str_cls->giveAttr("__nonzero__", new BoxedFunction(boxRTFunction((void*)strNonzero, BOXED_BOOL, 1))); str_cls->giveAttr("__nonzero__", new BoxedFunction(boxRTFunction((void*)strNonzero, BOXED_BOOL, 1)));
str_cls->giveAttr("isalnum", new BoxedFunction(boxRTFunction((void*)strIsAlnum, BOXED_BOOL, 1))); str_cls->giveAttr("isalnum", new BoxedFunction(boxRTFunction((void*)strIsAlnum, BOXED_BOOL, 1)));
......
...@@ -430,36 +430,7 @@ private: ...@@ -430,36 +430,7 @@ private:
friend void setupRuntime(); friend void setupRuntime();
}; };
template <typename T> struct StringHash { extern "C" size_t strHashUnboxed(BoxedString* self);
size_t operator()(const T* str) {
size_t hash = 5381;
T c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
size_t operator()(const T* str, int len) {
size_t hash = 5381;
T c;
while (--len >= 0) {
c = *str++;
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
}
};
template <> struct StringHash<std::string> {
size_t operator()(const std::string& str) {
StringHash<char> H;
return H(&str[0], str.size());
}
};
class BoxedInstanceMethod : public Box { class BoxedInstanceMethod : public Box {
public: public:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment