Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
5926766a
Commit
5926766a
authored
Aug 10, 2015
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #813 from Daetalus/test_hash
Rewrite string hash and enable test_hash
parents
961e615e
a13167f9
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
354 additions
and
48 deletions
+354
-48
from_cpython/CMakeLists.txt
from_cpython/CMakeLists.txt
+1
-0
from_cpython/Lib/test/test_hash.py
from_cpython/Lib/test/test_hash.py
+0
-1
from_cpython/Python/random.c
from_cpython/Python/random.c
+285
-0
src/jit.cpp
src/jit.cpp
+10
-1
src/runtime/builtin_modules/sys.cpp
src/runtime/builtin_modules/sys.cpp
+1
-0
src/runtime/objmodel.cpp
src/runtime/objmodel.cpp
+1
-2
src/runtime/str.cpp
src/runtime/str.cpp
+55
-14
src/runtime/types.h
src/runtime/types.h
+1
-30
No files found.
from_cpython/CMakeLists.txt
View file @
5926766a
...
@@ -102,6 +102,7 @@ file(GLOB_RECURSE STDPYTHON_SRCS Python
...
@@ -102,6 +102,7 @@ file(GLOB_RECURSE STDPYTHON_SRCS Python
mystrtoul.c
mystrtoul.c
pyctype.c
pyctype.c
pystrtod.c
pystrtod.c
random.c
structmember.c
structmember.c
)
)
...
...
from_cpython/Lib/test/test_hash.py
View file @
5926766a
# expected: fail
# test the invariant that
# test the invariant that
# iff a==b then hash(a)==hash(b)
# iff a==b then hash(a)==hash(b)
#
#
...
...
from_cpython/Python/random.c
0 → 100644
View file @
5926766a
#include "Python.h"
#ifdef MS_WINDOWS
#include <windows.h>
#else
#include <fcntl.h>
#endif
#ifdef Py_DEBUG
int
_Py_HashSecret_Initialized
=
0
;
#else
static
int
_Py_HashSecret_Initialized
=
0
;
#endif
#ifdef MS_WINDOWS
typedef
BOOL
(
WINAPI
*
CRYPTACQUIRECONTEXTA
)(
HCRYPTPROV
*
phProv
,
\
LPCSTR
pszContainer
,
LPCSTR
pszProvider
,
DWORD
dwProvType
,
\
DWORD
dwFlags
);
typedef
BOOL
(
WINAPI
*
CRYPTGENRANDOM
)(
HCRYPTPROV
hProv
,
DWORD
dwLen
,
\
BYTE
*
pbBuffer
);
static
CRYPTGENRANDOM
pCryptGenRandom
=
NULL
;
/* This handle is never explicitly released. Instead, the operating
system will release it when the process terminates. */
static
HCRYPTPROV
hCryptProv
=
0
;
static
int
win32_urandom_init
(
int
raise
)
{
HINSTANCE
hAdvAPI32
=
NULL
;
CRYPTACQUIRECONTEXTA
pCryptAcquireContext
=
NULL
;
/* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
hAdvAPI32
=
GetModuleHandle
(
"advapi32.dll"
);
if
(
hAdvAPI32
==
NULL
)
goto
error
;
/* Obtain pointers to the CryptoAPI functions. This will fail on some early
versions of Win95. */
pCryptAcquireContext
=
(
CRYPTACQUIRECONTEXTA
)
GetProcAddress
(
hAdvAPI32
,
"CryptAcquireContextA"
);
if
(
pCryptAcquireContext
==
NULL
)
goto
error
;
pCryptGenRandom
=
(
CRYPTGENRANDOM
)
GetProcAddress
(
hAdvAPI32
,
"CryptGenRandom"
);
if
(
pCryptGenRandom
==
NULL
)
goto
error
;
/* Acquire context */
if
(
!
pCryptAcquireContext
(
&
hCryptProv
,
NULL
,
NULL
,
PROV_RSA_FULL
,
CRYPT_VERIFYCONTEXT
))
goto
error
;
return
0
;
error:
if
(
raise
)
PyErr_SetFromWindowsErr
(
0
);
else
Py_FatalError
(
"Failed to initialize Windows random API (CryptoGen)"
);
return
-
1
;
}
/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
API. Return 0 on success, or -1 on error. */
static
int
win32_urandom
(
unsigned
char
*
buffer
,
Py_ssize_t
size
,
int
raise
)
{
Py_ssize_t
chunk
;
if
(
hCryptProv
==
0
)
{
if
(
win32_urandom_init
(
raise
)
==
-
1
)
return
-
1
;
}
while
(
size
>
0
)
{
chunk
=
size
>
INT_MAX
?
INT_MAX
:
size
;
if
(
!
pCryptGenRandom
(
hCryptProv
,
chunk
,
buffer
))
{
/* CryptGenRandom() failed */
if
(
raise
)
PyErr_SetFromWindowsErr
(
0
);
else
Py_FatalError
(
"Failed to initialized the randomized hash "
"secret using CryptoGen)"
);
return
-
1
;
}
buffer
+=
chunk
;
size
-=
chunk
;
}
return
0
;
}
#endif
/* MS_WINDOWS */
#ifdef __VMS
/* Use openssl random routine */
#include <openssl/rand.h>
static
int
vms_urandom
(
unsigned
char
*
buffer
,
Py_ssize_t
size
,
int
raise
)
{
if
(
RAND_pseudo_bytes
(
buffer
,
size
)
<
0
)
{
if
(
raise
)
{
PyErr_Format
(
PyExc_ValueError
,
"RAND_pseudo_bytes"
);
}
else
{
Py_FatalError
(
"Failed to initialize the randomized hash "
"secret using RAND_pseudo_bytes"
);
}
return
-
1
;
}
return
0
;
}
#endif
/* __VMS */
#if !defined(MS_WINDOWS) && !defined(__VMS)
/* Read size bytes from /dev/urandom into buffer.
Call Py_FatalError() on error. */
static
void
dev_urandom_noraise
(
char
*
buffer
,
Py_ssize_t
size
)
{
int
fd
;
Py_ssize_t
n
;
assert
(
0
<
size
);
fd
=
open
(
"/dev/urandom"
,
O_RDONLY
);
if
(
fd
<
0
)
Py_FatalError
(
"Failed to open /dev/urandom"
);
while
(
0
<
size
)
{
do
{
n
=
read
(
fd
,
buffer
,
(
size_t
)
size
);
}
while
(
n
<
0
&&
errno
==
EINTR
);
if
(
n
<=
0
)
{
/* stop on error or if read(size) returned 0 */
Py_FatalError
(
"Failed to read bytes from /dev/urandom"
);
break
;
}
buffer
+=
n
;
size
-=
(
Py_ssize_t
)
n
;
}
close
(
fd
);
}
/* Read size bytes from /dev/urandom into buffer.
Return 0 on success, raise an exception and return -1 on error. */
static
int
dev_urandom_python
(
char
*
buffer
,
Py_ssize_t
size
)
{
int
fd
;
Py_ssize_t
n
;
if
(
size
<=
0
)
return
0
;
Py_BEGIN_ALLOW_THREADS
fd
=
open
(
"/dev/urandom"
,
O_RDONLY
);
Py_END_ALLOW_THREADS
if
(
fd
<
0
)
{
if
(
errno
==
ENOENT
||
errno
==
ENXIO
||
errno
==
ENODEV
||
errno
==
EACCES
)
PyErr_SetString
(
PyExc_NotImplementedError
,
"/dev/urandom (or equivalent) not found"
);
else
PyErr_SetFromErrno
(
PyExc_OSError
);
return
-
1
;
}
Py_BEGIN_ALLOW_THREADS
do
{
do
{
n
=
read
(
fd
,
buffer
,
(
size_t
)
size
);
}
while
(
n
<
0
&&
errno
==
EINTR
);
if
(
n
<=
0
)
break
;
buffer
+=
n
;
size
-=
(
Py_ssize_t
)
n
;
}
while
(
0
<
size
);
Py_END_ALLOW_THREADS
if
(
n
<=
0
)
{
/* stop on error or if read(size) returned 0 */
if
(
n
<
0
)
PyErr_SetFromErrno
(
PyExc_OSError
);
else
PyErr_Format
(
PyExc_RuntimeError
,
"Failed to read %zi bytes from /dev/urandom"
,
size
);
close
(
fd
);
return
-
1
;
}
close
(
fd
);
return
0
;
}
#endif
/* !defined(MS_WINDOWS) && !defined(__VMS) */
/* Fill buffer with pseudo-random bytes generated by a linear congruent
generator (LCG):
x(n+1) = (x(n) * 214013 + 2531011) % 2^32
Use bits 23..16 of x(n) to generate a byte. */
static
void
lcg_urandom
(
unsigned
int
x0
,
unsigned
char
*
buffer
,
size_t
size
)
{
size_t
index
;
unsigned
int
x
;
x
=
x0
;
for
(
index
=
0
;
index
<
size
;
index
++
)
{
x
*=
214013
;
x
+=
2531011
;
/* modulo 2 ^ (8 * sizeof(int)) */
buffer
[
index
]
=
(
x
>>
16
)
&
0xff
;
}
}
void
_PyRandom_Init
(
void
)
{
char
*
env
;
void
*
secret
=
&
_Py_HashSecret
;
Py_ssize_t
secret_size
=
sizeof
(
_Py_HashSecret_t
);
if
(
_Py_HashSecret_Initialized
)
return
;
_Py_HashSecret_Initialized
=
1
;
/*
By default, hash randomization is disabled, and only
enabled if PYTHONHASHSEED is set to non-empty or if
"-R" is provided at the command line:
*/
if
(
!
Py_HashRandomizationFlag
)
{
/* Disable the randomized hash: */
memset
(
secret
,
0
,
secret_size
);
return
;
}
/*
Hash randomization is enabled. Generate a per-process secret,
using PYTHONHASHSEED if provided.
*/
env
=
Py_GETENV
(
"PYTHONHASHSEED"
);
if
(
env
&&
*
env
!=
'\0'
&&
strcmp
(
env
,
"random"
)
!=
0
)
{
char
*
endptr
=
env
;
unsigned
long
seed
;
seed
=
strtoul
(
env
,
&
endptr
,
10
);
if
(
*
endptr
!=
'\0'
||
seed
>
4294967295UL
||
(
errno
==
ERANGE
&&
seed
==
ULONG_MAX
))
{
Py_FatalError
(
"PYTHONHASHSEED must be
\"
random
\"
or an integer "
"in range [0; 4294967295]"
);
}
if
(
seed
==
0
)
{
/* disable the randomized hash */
memset
(
secret
,
0
,
secret_size
);
}
else
{
lcg_urandom
(
seed
,
(
unsigned
char
*
)
secret
,
secret_size
);
}
}
else
{
#ifdef MS_WINDOWS
(
void
)
win32_urandom
((
unsigned
char
*
)
secret
,
secret_size
,
0
);
#else
/* #ifdef MS_WINDOWS */
# ifdef __VMS
vms_urandom
((
unsigned
char
*
)
secret
,
secret_size
,
0
);
# else
dev_urandom_noraise
((
char
*
)
secret
,
secret_size
);
# endif
#endif
}
}
src/jit.cpp
View file @
5926766a
...
@@ -301,7 +301,7 @@ static int main(int argc, char** argv) {
...
@@ -301,7 +301,7 @@ static int main(int argc, char** argv) {
// Suppress getopt errors so we can throw them ourselves
// Suppress getopt errors so we can throw them ourselves
opterr
=
0
;
opterr
=
0
;
while
((
code
=
getopt
(
argc
,
argv
,
"+:OqdIibpjtrsSvnxEac:FuPTGm:"
))
!=
-
1
)
{
while
((
code
=
getopt
(
argc
,
argv
,
"+:OqdIibpjtrs
R
SvnxEac:FuPTGm:"
))
!=
-
1
)
{
if
(
code
==
'c'
)
{
if
(
code
==
'c'
)
{
assert
(
optarg
);
assert
(
optarg
);
command
=
optarg
;
command
=
optarg
;
...
@@ -312,6 +312,9 @@ static int main(int argc, char** argv) {
...
@@ -312,6 +312,9 @@ static int main(int argc, char** argv) {
module
=
optarg
;
module
=
optarg
;
// no more option parsing; the rest of our arguments go into sys.argv.
// no more option parsing; the rest of our arguments go into sys.argv.
break
;
break
;
}
else
if
(
code
==
'R'
)
{
Py_HashRandomizationFlag
=
1
;
break
;
}
else
if
(
code
==
':'
)
{
}
else
if
(
code
==
':'
)
{
fprintf
(
stderr
,
"Argument expected for the -%c option
\n
"
,
optopt
);
fprintf
(
stderr
,
"Argument expected for the -%c option
\n
"
,
optopt
);
return
2
;
return
2
;
...
@@ -324,7 +327,13 @@ static int main(int argc, char** argv) {
...
@@ -324,7 +327,13 @@ static int main(int argc, char** argv) {
return
r
;
return
r
;
}
}
}
}
/* The variable is only tested for existence here; _PyRandom_Init will
check its value further. */
char
*
p
;
if
(
!
Py_HashRandomizationFlag
&&
(
p
=
Py_GETENV
(
"PYTHONHASHSEED"
))
&&
*
p
!=
'\0'
)
Py_HashRandomizationFlag
=
1
;
_PyRandom_Init
();
Stats
::
startEstimatingCPUFreq
();
Stats
::
startEstimatingCPUFreq
();
const
char
*
fn
=
NULL
;
const
char
*
fn
=
NULL
;
...
...
src/runtime/builtin_modules/sys.cpp
View file @
5926766a
...
@@ -39,6 +39,7 @@ BoxedDict* sys_modules_dict;
...
@@ -39,6 +39,7 @@ BoxedDict* sys_modules_dict;
extern
"C"
{
extern
"C"
{
// supposed to be exposed through sys.flags
// supposed to be exposed through sys.flags
int
Py_BytesWarningFlag
=
0
;
int
Py_BytesWarningFlag
=
0
;
int
Py_HashRandomizationFlag
=
0
;
}
}
Box
*
sysExcInfo
()
{
Box
*
sysExcInfo
()
{
...
...
src/runtime/objmodel.cpp
View file @
5926766a
...
@@ -130,9 +130,8 @@ size_t PyHasher::operator()(Box* b) const {
...
@@ -130,9 +130,8 @@ size_t PyHasher::operator()(Box* b) const {
ScopedStatTimer
_st
(
pyhasher_timer_counter
,
10
);
ScopedStatTimer
_st
(
pyhasher_timer_counter
,
10
);
#endif
#endif
if
(
b
->
cls
==
str_cls
)
{
if
(
b
->
cls
==
str_cls
)
{
StringHash
<
char
>
H
;
auto
s
=
static_cast
<
BoxedString
*>
(
b
);
auto
s
=
static_cast
<
BoxedString
*>
(
b
);
return
H
(
s
->
data
(),
s
->
size
()
);
return
strHashUnboxed
(
s
);
}
}
return
hashUnboxed
(
b
);
return
hashUnboxed
(
b
);
...
...
src/runtime/str.cpp
View file @
5926766a
...
@@ -1522,28 +1522,69 @@ failed:
...
@@ -1522,28 +1522,69 @@ failed:
}
}
extern
"C"
size_t
unicodeHashUnboxed
(
PyUnicodeObject
*
self
)
{
extern
"C"
size_t
unicodeHashUnboxed
(
PyUnicodeObject
*
self
)
{
Py_ssize_t
len
;
Py_UNICODE
*
p
;
long
x
;
#ifdef Py_DEBUG
assert
(
_Py_HashSecret_Initialized
);
#endif
if
(
self
->
hash
!=
-
1
)
if
(
self
->
hash
!=
-
1
)
return
self
->
hash
;
return
self
->
hash
;
len
=
PyUnicode_GET_SIZE
(
self
);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if
(
len
==
0
)
{
self
->
hash
=
0
;
return
0
;
}
p
=
PyUnicode_AS_UNICODE
(
self
);
x
=
_Py_HashSecret
.
prefix
;
x
^=
*
p
<<
7
;
while
(
--
len
>=
0
)
x
=
(
1000003
*
x
)
^
*
p
++
;
x
^=
PyUnicode_GET_SIZE
(
self
);
x
^=
_Py_HashSecret
.
suffix
;
if
(
x
==
-
1
)
x
=
-
2
;
self
->
hash
=
x
;
return
x
;
}
Py_ssize_t
len
=
PyUnicode_GET_SIZE
(
self
);
extern
"C"
size_t
strHashUnboxed
(
BoxedString
*
self
)
{
assert
(
PyString_Check
(
self
));
const
char
*
p
;
long
x
;
#ifdef Py_DEBUG
assert
(
_Py_HashSecret_Initialized
);
#endif
if
(
len
==
0
)
long
len
=
Py_SIZE
(
self
);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if
(
len
==
0
)
{
return
0
;
return
0
;
}
p
=
self
->
s
().
data
();
x
=
_Py_HashSecret
.
prefix
;
x
^=
*
p
<<
7
;
while
(
--
len
>=
0
)
x
=
(
1000003
*
x
)
^
*
p
++
;
x
^=
Py_SIZE
(
self
);
x
^=
_Py_HashSecret
.
suffix
;
if
(
x
==
-
1
)
x
=
-
2
;
Py_UNICODE
*
p
=
PyUnicode_AS_UNICODE
(
self
);
return
x
;
pyston
::
StringHash
<
Py_UNICODE
>
H
;
return
H
(
p
,
len
);
}
}
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
assert
(
PyString_Check
(
self
));
return
boxLong
(
strHashUnboxed
(
self
));
// CPython set the hash empty string to 0 manually
if
(
self
->
size
()
==
0
)
return
boxInt
(
0
);
StringHash
<
char
>
H
;
return
boxInt
(
H
(
self
->
data
(),
self
->
size
()));
}
}
extern
"C"
Box
*
strNonzero
(
BoxedString
*
self
)
{
extern
"C"
Box
*
strNonzero
(
BoxedString
*
self
)
{
...
@@ -2714,7 +2755,7 @@ void setupStr() {
...
@@ -2714,7 +2755,7 @@ void setupStr() {
str_cls
->
giveAttr
(
"__len__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strLen
,
BOXED_INT
,
1
)));
str_cls
->
giveAttr
(
"__len__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strLen
,
BOXED_INT
,
1
)));
str_cls
->
giveAttr
(
"__str__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strStr
,
STR
,
1
)));
str_cls
->
giveAttr
(
"__str__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strStr
,
STR
,
1
)));
str_cls
->
giveAttr
(
"__repr__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strRepr
,
STR
,
1
)));
str_cls
->
giveAttr
(
"__repr__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strRepr
,
STR
,
1
)));
str_cls
->
giveAttr
(
"__hash__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strHash
,
BOXED_INT
,
1
)));
str_cls
->
giveAttr
(
"__hash__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strHash
,
UNKNOWN
,
1
)));
str_cls
->
giveAttr
(
"__nonzero__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strNonzero
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"__nonzero__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strNonzero
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"isalnum"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsAlnum
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"isalnum"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsAlnum
,
BOXED_BOOL
,
1
)));
...
...
src/runtime/types.h
View file @
5926766a
...
@@ -430,36 +430,7 @@ private:
...
@@ -430,36 +430,7 @@ private:
friend
void
setupRuntime
();
friend
void
setupRuntime
();
};
};
template
<
typename
T
>
struct
StringHash
{
extern
"C"
size_t
strHashUnboxed
(
BoxedString
*
self
);
size_t
operator
()(
const
T
*
str
)
{
size_t
hash
=
5381
;
T
c
;
while
((
c
=
*
str
++
))
hash
=
((
hash
<<
5
)
+
hash
)
+
c
;
/* hash * 33 + c */
return
hash
;
}
size_t
operator
()(
const
T
*
str
,
int
len
)
{
size_t
hash
=
5381
;
T
c
;
while
(
--
len
>=
0
)
{
c
=
*
str
++
;
hash
=
((
hash
<<
5
)
+
hash
)
+
c
;
/* hash * 33 + c */
}
return
hash
;
}
};
template
<
>
struct
StringHash
<
std
::
string
>
{
size_t
operator
()(
const
std
::
string
&
str
)
{
StringHash
<
char
>
H
;
return
H
(
&
str
[
0
],
str
.
size
());
}
};
class
BoxedInstanceMethod
:
public
Box
{
class
BoxedInstanceMethod
:
public
Box
{
public:
public:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment