Commit 7eec7fa2 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #886 from kmod/perf2

Lower initial dict/set size from 64->8
parents 5b0b1fe8 46101afb
......@@ -300,7 +300,7 @@ endif()
# format
file(GLOB_RECURSE FORMAT_FILES ${CMAKE_SOURCE_DIR}/src/*.h ${CMAKE_SOURCE_DIR}/src/*.cpp)
add_custom_target(format ${LLVM_TOOLS_BINARY_DIR}/clang-format -style=file -i ${FORMAT_FILES} DEPENDS clang-format WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src)
add_custom_target(format ${CMAKE_SOURCE_DIR}/tools/do_format.sh ${LLVM_TOOLS_BINARY_DIR}/clang-format DEPENDS clang-format WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src)
add_custom_target(check-format ${CMAKE_SOURCE_DIR}/tools/check_format.sh ${LLVM_TOOLS_BINARY_DIR}/clang-format DEPENDS clang-format WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src)
# lint
......
# From: https://mail.python.org/pipermail/pypy-dev/2014-August/012695.html
L = 10
xrows = range(L)
xcols = range(L)
bitmap = [0] * L ** 2
poss = [(i, j) for i in xrows for j in xcols]
idx_to_pos = dict()
pos_to_idx = dict()
for i, pos in enumerate(poss):
idx_to_pos[i] = pos
pos_to_idx[pos] = i
# rows, columns, "right" diagonals and "left" diagonals
poscols = [[(i, j) for i in xrows] for j in xcols]
posrows = [[(i, j) for j in xcols] for i in xrows]
posdiag = [[(h, g - h) for h in range(g + 1) if h < L and g - h < L] for g in range(L * 2 - 1)]
posgaid = [[(g + h, h) for h in range(L) if -1 < g + h < L] for g in range(-L + 1, L)]
def attacks(pos):
""" all attacked positions """
row = filter(lambda r: pos in r, posrows)
col = filter(lambda c: pos in c, poscols)
dia = filter(lambda d: pos in d, posdiag)
gai = filter(lambda g: pos in g, posgaid)
assert len(row) == len(col) == len(dia) == len(gai) == 1
return frozenset(row[0]), frozenset(col[0]), frozenset(dia[0]), frozenset(gai[0])
attackmap = {(i, j): attacks((i, j)) for i in range(L) for j in range(L)}
setcols = set(map(frozenset, poscols))
setrows = set(map(frozenset, posrows))
setdiag = set(map(frozenset, posdiag))
setgaid = set(map(frozenset, posgaid))
# choice between bitmaps and sets
#
# bitmaps are reresented natively as (long) ints in Python,
# thus bitmap operations are very very fast
#
# however for asymptotic complexity, x in bitmap operation is O(N) and x in set is O(logN)
#
# in my experience python function calls are expensive, thus the threshold where sets show benefit is rather high
# another possible explanation for high threshold is large memory size of Python dictionaries and thus frozensets,
# __sizeof__ for representaions of range(100):: set: 8K, frozenset: 4K, (2 ** 100): 40 bytes
#
# for 8x8 board, a 64-bit bitmap wins by a large margin
# IMO 10x10 board is still faster with bitmaps
# all queens are equivalent, thus solution (Q1, Q2, Q3) == (Q1, Q3, Q2)
# let's order queens, so that Q1 always preceeds on Q2 on the board
# then, let's do an exhaustive search with early pruning:
# consider board of 4 [ , , , ] for 3 queens
# position [ , ,Q1, ] will never generate a solution, because there's no space for both Q2 and Q3 left
# likewise, let's extend concept of "space" along 4 dimensions -- rows, cols, diag, gaid
solutions = []
def place(board, queens, r, c, d, g):
"""
remaining unattacked places on the board
remaining queens to place
remaining rows, cols, diag, gaid free
"""
# if we are ran out of queens, it's a valid solution
if not queens:
# print "solution found"
solutions.append(None)
# early pruning, make sure this many queens can actually be placed
if len(queens) > len(board): return
if len(queens) > len(r): return
if len(queens) > len(c): return
if len(queens) > len(d): return
if len(queens) > len(g): return
# queens[0] is queen to be places on some pos
for ip, pos in enumerate(board):
ar, ac, ad, ag = attackmap[pos]
attacked = frozenset.union(ar, ac, ad, ag)
nboard = [b for b in board[ip + 1:] if b not in attacked]
place(nboard, queens[1:], r - ar, c - ac, d - ad, g - ag)
def run():
del solutions[:]
place(poss, sorted(["Q%s" % i for i in range(L)]), setrows, setcols, setdiag, setgaid)
return len(solutions)
print(run())
l = [set(range(5)) for i in xrange(1000)]
def f():
s1 = set(range(1))
s2 = set(range(1))
for i in xrange(400000):
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
s1 - s2
f()
This diff is collapsed.
// This file was copied from https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/ADT/DenseSet.h?p=230300
// and came with the following license:
//===- llvm/ADT/DenseSet.h - Dense probed hash table ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the DenseSet class.
//
//===----------------------------------------------------------------------===//
// Modifications were made for Pyston, using the following license:
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_CORE_FROMLLVM_DENSESET_H
#define PYSTON_CORE_FROMLLVM_DENSESET_H
#include "core/from_llvm/DenseMap.h"
namespace pyston {
namespace detail {
struct DenseSetEmpty {};
// Use the empty base class trick so we can create a DenseMap where the buckets
// contain only a single item.
template <typename KeyT> class DenseSetPair : public DenseSetEmpty {
KeyT key;
public:
KeyT &getFirst() { return key; }
const KeyT &getFirst() const { return key; }
DenseSetEmpty &getSecond() { return *this; }
const DenseSetEmpty &getSecond() const { return *this; }
};
}
/// DenseSet - This implements a dense probed hash-table based set.
template<typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>, int MinSize = 64>
class DenseSet {
typedef DenseMap<ValueT, detail::DenseSetEmpty, ValueInfoT,
detail::DenseSetPair<ValueT>, MinSize> MapTy;
static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT),
"DenseMap buckets unexpectedly large!");
MapTy TheMap;
public:
typedef ValueT key_type;
typedef ValueT value_type;
typedef unsigned size_type;
explicit DenseSet(unsigned NumInitBuckets = 0) : TheMap(NumInitBuckets) {}
bool empty() const { return TheMap.empty(); }
size_type size() const { return TheMap.size(); }
size_t getMemorySize() const { return TheMap.getMemorySize(); }
/// Grow the DenseSet so that it has at least Size buckets. Will not shrink
/// the Size of the set.
void resize(size_t Size) { TheMap.resize(Size); }
void clear() {
TheMap.clear();
}
/// Return 1 if the specified key is in the set, 0 otherwise.
size_type count(const ValueT &V) const {
return TheMap.count(V);
}
bool erase(const ValueT &V) {
return TheMap.erase(V);
}
void swap(DenseSet& RHS) {
TheMap.swap(RHS.TheMap);
}
// Iterators.
class Iterator {
typename MapTy::iterator I;
friend class DenseSet;
public:
typedef typename MapTy::iterator::difference_type difference_type;
typedef ValueT value_type;
typedef value_type *pointer;
typedef value_type &reference;
typedef std::forward_iterator_tag iterator_category;
Iterator(const typename MapTy::iterator &i) : I(i) {}
ValueT &operator*() { return I->getFirst(); }
ValueT *operator->() { return &I->getFirst(); }
Iterator& operator++() { ++I; return *this; }
bool operator==(const Iterator& X) const { return I == X.I; }
bool operator!=(const Iterator& X) const { return I != X.I; }
};
class ConstIterator {
typename MapTy::const_iterator I;
friend class DenseSet;
public:
typedef typename MapTy::const_iterator::difference_type difference_type;
typedef ValueT value_type;
typedef value_type *pointer;
typedef value_type &reference;
typedef std::forward_iterator_tag iterator_category;
ConstIterator(const typename MapTy::const_iterator &i) : I(i) {}
const ValueT &operator*() { return I->getFirst(); }
const ValueT *operator->() { return &I->getFirst(); }
ConstIterator& operator++() { ++I; return *this; }
bool operator==(const ConstIterator& X) const { return I == X.I; }
bool operator!=(const ConstIterator& X) const { return I != X.I; }
};
typedef Iterator iterator;
typedef ConstIterator const_iterator;
iterator begin() { return Iterator(TheMap.begin()); }
iterator end() { return Iterator(TheMap.end()); }
const_iterator begin() const { return ConstIterator(TheMap.begin()); }
const_iterator end() const { return ConstIterator(TheMap.end()); }
iterator find(const ValueT &V) { return Iterator(TheMap.find(V)); }
/// Alternative version of find() which allows a different, and possibly less
/// expensive, key type.
/// The DenseMapInfo is responsible for supplying methods
/// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key type
/// used.
template <class LookupKeyT>
iterator find_as(const LookupKeyT &Val) {
return Iterator(TheMap.find_as(Val));
}
template <class LookupKeyT>
const_iterator find_as(const LookupKeyT &Val) const {
return ConstIterator(TheMap.find_as(Val));
}
void erase(Iterator I) { return TheMap.erase(I.I); }
void erase(ConstIterator CI) { return TheMap.erase(CI.I); }
std::pair<iterator, bool> insert(const ValueT &V) {
detail::DenseSetEmpty Empty;
return TheMap.insert(std::make_pair(V, Empty));
}
// Range insertion of values.
template<typename InputIt>
void insert(InputIt I, InputIt E) {
for (; I != E; ++I)
insert(*I);
}
};
} // end namespace pyston
#endif
......@@ -280,6 +280,14 @@ public:
map[pthread_self()] = s;
ASSERT(this->map.size() == this->map_elts, "%ld %d", this->map.size(), this->map_elts);
}
#ifndef NDEBUG
{
LOCK_REGION(&lock);
ASSERT(this->map.size() == this->map_elts, "%ld %d", this->map.size(), this->map_elts);
}
#endif
return &s->val;
}
};
......
......@@ -15,8 +15,7 @@
#ifndef PYSTON_RUNTIME_SET_H
#define PYSTON_RUNTIME_SET_H
#include "llvm/ADT/DenseSet.h"
#include "core/from_llvm/DenseSet.h"
#include "core/types.h"
#include "runtime/types.h"
......@@ -29,7 +28,7 @@ extern "C" Box* createSet();
class BoxedSet : public Box {
public:
typedef llvm::DenseSet<BoxAndHash, BoxAndHash::Comparisons> Set;
typedef pyston::DenseSet<BoxAndHash, BoxAndHash::Comparisons, /* MinSize= */ 8> Set;
Set s;
Box** weakreflist; /* List of weak references */
......
......@@ -24,6 +24,7 @@
#include "codegen/irgen/future.h"
#include "core/contiguous_map.h"
#include "core/from_llvm/DenseMap.h"
#include "core/threading.h"
#include "core/types.h"
#include "gc/gc_alloc.h"
......@@ -707,7 +708,8 @@ struct BoxAndHash {
class BoxedDict : public Box {
public:
typedef llvm::DenseMap<BoxAndHash, Box*, BoxAndHash::Comparisons> DictMap;
typedef pyston::DenseMap<BoxAndHash, Box*, BoxAndHash::Comparisons, detail::DenseMapPair<BoxAndHash, Box*>,
/* MinSize= */ 8> DictMap;
DictMap d;
......
#!/usr/bin/env bash
set -eu
failed=0
for fn in $(find . -name '*.cpp' -o -name '*.h'); do
for fn in $(find . -path ./core -prune -o -name '*.cpp' -print -o -name '*.h' -print); do
diff -u $fn <($1 -style=file $fn) || failed=1
done
......
#!/usr/bin/env bash
set -eu
for fn in $(find . -path ./core -prune -o -name '*.cpp' -print -o -name '*.h' -print); do
$1 -i --style=file $fn
done
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment