400 lines
11 KiB
C
400 lines
11 KiB
C
|
// Copyright (C) 2006 Fokko Beekhof
|
||
|
// Email contact: Fokko.Beekhof@cui.unige.ch
|
||
|
|
||
|
// The OMPTL library is free software; you can redistribute it and/or
|
||
|
// modify it under the terms of the GNU Lesser General Public
|
||
|
// License as published by the Free Software Foundation; either
|
||
|
// version 2.1 of the License, or (at your option) any later version.
|
||
|
|
||
|
// This library is distributed in the hope that it will be useful,
|
||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
// Lesser General Public License for more details.
|
||
|
|
||
|
// You should have received a copy of the GNU Lesser General Public
|
||
|
// License along with this library; if not, write to the Free Software
|
||
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
|
||
|
#ifndef OMPTL_TOOLS_H
|
||
|
#define OMPTL_TOOLS_H 1
|
||
|
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
#include <cassert>
|
||
|
#include <algorithm>
|
||
|
#include <climits>
|
||
|
|
||
|
namespace omptl
|
||
|
{
|
||
|
|
||
|
// Log of the number of operations that is expected to run faster in a single
|
||
|
// thread.
|
||
|
const unsigned C = 12;
|
||
|
|
||
|
template <typename T>
|
||
|
T log2N_(T n)
|
||
|
{
|
||
|
if (n == 0)
|
||
|
return 0;
|
||
|
|
||
|
const unsigned b[] =
|
||
|
#if (WORD_BIT == 32)
|
||
|
{0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u};
|
||
|
#else // 64-bit
|
||
|
{0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u, 0xFFFFFFFF00000000u};
|
||
|
assert(WORD_BIT == 64);
|
||
|
#endif
|
||
|
const T S[] = {1u, 2u, 4u, 8u, 16u, 32u, 64u, 128u};
|
||
|
|
||
|
T result = 0u; // result of log2(v) will go here
|
||
|
for (int i = static_cast<int>(sizeof(T)); i >= 0; --i)
|
||
|
{
|
||
|
if (n & b[i])
|
||
|
{
|
||
|
n >>= S[i];
|
||
|
result |= S[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
template<typename Iterator>
|
||
|
bool _linear_serial_is_faster(Iterator first, Iterator last,
|
||
|
const unsigned P)
|
||
|
{
|
||
|
assert(P > 0u);
|
||
|
assert(::std::distance(first, last) >= 0);
|
||
|
const unsigned N = ::std::distance(first, last);
|
||
|
|
||
|
return (N < 2u*P) || (log2N_(N) < C);
|
||
|
}
|
||
|
|
||
|
template<typename Iterator>
|
||
|
bool _logn_serial_is_faster(Iterator first, Iterator last,
|
||
|
const unsigned P)
|
||
|
{
|
||
|
assert(P > 0u);
|
||
|
assert(::std::distance(first, last) >= 0);
|
||
|
const unsigned N = ::std::distance(first, last);
|
||
|
|
||
|
return (N < 2u*P) || (log2N_(N) < (1 << C));
|
||
|
}
|
||
|
|
||
|
template<typename Iterator>
|
||
|
bool _nlogn_serial_is_faster(Iterator first, Iterator last,
|
||
|
const unsigned P)
|
||
|
{
|
||
|
assert(P > 0u);
|
||
|
assert(::std::distance(first, last) >= 0);
|
||
|
const unsigned N = ::std::distance(first, last);
|
||
|
|
||
|
return (N < 2u*P) || (N*log2N_(N) < (1 << C));
|
||
|
}
|
||
|
|
||
|
template<typename Iterator1, typename Iterator2>
|
||
|
void _copy_partitions(const ::std::vector< ::std::pair<Iterator1, Iterator1> >
|
||
|
&source_partitions, Iterator2 first,
|
||
|
::std::vector<Iterator2> &dest_partitions, const unsigned P)
|
||
|
{
|
||
|
assert(source_partitions.size() == P);
|
||
|
assert(dest_partitions.size() == P);
|
||
|
for (unsigned i = 0; i < P; ++i)
|
||
|
{
|
||
|
dest_partitions[i] = first;
|
||
|
|
||
|
// The last "advance" is very important, it may create space
|
||
|
// if it is an InsertIterator or something like that.
|
||
|
::std::advance(first, ::std::distance(
|
||
|
source_partitions[i].first,
|
||
|
source_partitions[i].second) );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Divide a given range into P partitions
|
||
|
template<typename Iterator>
|
||
|
void _partition_range(Iterator first, Iterator last,
|
||
|
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
|
||
|
const unsigned P)
|
||
|
{
|
||
|
assert(partitions.size() == P);
|
||
|
|
||
|
typedef ::std::pair<Iterator, Iterator> Partition;
|
||
|
|
||
|
const unsigned N = ::std::distance(first, last);
|
||
|
const unsigned range = N / P + ((N%P)? 1 : 0);
|
||
|
assert(2u*P <= N);
|
||
|
assert(range <= N);
|
||
|
|
||
|
// All but last partition have same range
|
||
|
Iterator currentLast = first;
|
||
|
::std::advance(currentLast, range);
|
||
|
for (unsigned i = 0; i < P - 1; ++i)
|
||
|
{
|
||
|
partitions[i] = Partition(first, currentLast);
|
||
|
first = currentLast;
|
||
|
::std::advance(currentLast, range);
|
||
|
}
|
||
|
|
||
|
// Last range may be shorter
|
||
|
assert(::std::distance(first, last) <= range);
|
||
|
partitions[P - 1] = Partition(first, last);
|
||
|
}
|
||
|
|
||
|
// Given a range, re-arrange the items such that all elements smaller than
|
||
|
// the pivot precede all other values. Returns an Iterator to the first
|
||
|
// element not smaller than the pivot.
|
||
|
template<typename Iterator, class StrictWeakOrdering>
|
||
|
Iterator _stable_pivot_range(Iterator first, Iterator last,
|
||
|
const typename ::std::iterator_traits<Iterator>::value_type pivot,
|
||
|
StrictWeakOrdering comp = std::less<
|
||
|
typename ::std::iterator_traits<Iterator>::value_type>())
|
||
|
{
|
||
|
Iterator pivotIt = last;
|
||
|
while (first < last)
|
||
|
{
|
||
|
if (comp(*first, pivot))
|
||
|
++first;
|
||
|
else
|
||
|
{
|
||
|
Iterator high = first;
|
||
|
while ( (++high < last) && !comp(*high, pivot) )
|
||
|
/* nop */;
|
||
|
if (high < last)
|
||
|
::std::iter_swap(first, last);
|
||
|
first = pivotIt = ++high;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return pivotIt;
|
||
|
}
|
||
|
|
||
|
template<typename Iterator, class StrictWeakOrdering>
|
||
|
Iterator _pivot_range(Iterator first, Iterator last,
|
||
|
const typename ::std::iterator_traits<Iterator>::value_type pivot,
|
||
|
StrictWeakOrdering comp)
|
||
|
{
|
||
|
while (first < last)
|
||
|
{
|
||
|
if (comp(*first, pivot))
|
||
|
++first;
|
||
|
else
|
||
|
{
|
||
|
while ( (first < --last) && !comp(*last, pivot) )
|
||
|
/* nop */;
|
||
|
::std::iter_swap(first, last);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return last;
|
||
|
}
|
||
|
|
||
|
template<typename Iterator, class StrictWeakOrdering>
|
||
|
void _partition_range_by_pivots(Iterator first, Iterator last,
|
||
|
const ::std::vector<typename
|
||
|
::std::iterator_traits<Iterator>::value_type> &pivots,
|
||
|
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
|
||
|
StrictWeakOrdering comp, const unsigned P)
|
||
|
{
|
||
|
assert(partitions.size() == P);
|
||
|
typedef ::std::pair<Iterator, Iterator> Partition;
|
||
|
::std::vector<Iterator> ptable(P);
|
||
|
::std::vector<typename ::std::iterator_traits<Iterator>::value_type>
|
||
|
pvts(pivots.size());
|
||
|
|
||
|
::std::vector<Iterator> borders;
|
||
|
|
||
|
::std::vector<bool> used(pivots.size());
|
||
|
::std::fill(used.begin(), used.end(), false);
|
||
|
|
||
|
borders.push_back(first);
|
||
|
borders.push_back(last);
|
||
|
partitions[0].first = first;
|
||
|
partitions[0].second = last;
|
||
|
for (unsigned p = 1; (1 << p) <= (int)P; ++p)
|
||
|
{
|
||
|
const int PROC = (1 << p);
|
||
|
const int PIVOTS = (1 << (p-1));
|
||
|
assert(PIVOTS <= (int)pivots.size());
|
||
|
|
||
|
#pragma omp parallel for //default(none) shared(used, pvts)
|
||
|
for (int t = 0; t < PIVOTS; ++t)
|
||
|
{
|
||
|
const int index = int(P / PROC) +
|
||
|
2 * t * int(P / PROC) - 1;
|
||
|
assert(index < (int)pivots.size());
|
||
|
assert(!used[index]);
|
||
|
used[index] = true;
|
||
|
pvts[t] = pivots[index];
|
||
|
/*::std::cout << "pvts T: " << t << " --> " << index <<
|
||
|
" " << pvts[t] << ::std::endl;*/
|
||
|
}
|
||
|
|
||
|
#pragma omp parallel for //default(none) private(comp)
|
||
|
// shared(ptable, pvts, partitions)
|
||
|
for (int t = 0; t < PIVOTS; ++t)
|
||
|
ptable[t] = _pivot_range(partitions[t].first,
|
||
|
partitions[t].second,
|
||
|
pvts[t], comp);
|
||
|
|
||
|
for (int i = 0; i < PIVOTS; ++i)
|
||
|
{
|
||
|
// ::std::cout << "ADD: " << ::std::distance(first, ptable[t]) << ::std::endl;
|
||
|
borders.push_back(ptable[i]);
|
||
|
}
|
||
|
|
||
|
::std::sort(borders.begin(), borders.end());
|
||
|
|
||
|
for (unsigned i = 0; i < borders.size() - 1; ++i)
|
||
|
{
|
||
|
partitions[i].first = borders[i];
|
||
|
partitions[i].second = borders[i + 1];
|
||
|
}
|
||
|
|
||
|
/*::std::cout << "PASS: " << p << ::std::endl;
|
||
|
for (t = 0; t < (1 << p); ++t)
|
||
|
::std::cout << t << ": " << ::std::distance(first, partitions[t].first)
|
||
|
<< " - " << ::std::distance(first, partitions[t].second)
|
||
|
<< ::std::endl;*/
|
||
|
}
|
||
|
|
||
|
for (unsigned i = 0; i < pivots.size(); ++i)
|
||
|
if(!used[i])
|
||
|
pvts[i] = pivots[i];
|
||
|
|
||
|
#pragma omp parallel for // default(none) private(t, comp)
|
||
|
// shared(used, ptable, pvts, partitions)
|
||
|
for (int t = 0; t < int(pivots.size()); ++t)
|
||
|
if (!used[t])
|
||
|
ptable[t] = _pivot_range(partitions[t].first,
|
||
|
partitions[t].second,
|
||
|
pvts[t], comp);
|
||
|
|
||
|
|
||
|
for (unsigned i = 0; i < pivots.size(); ++i)
|
||
|
{
|
||
|
if (!used[i])
|
||
|
{
|
||
|
// ::std::cout << "LAST ADD: " << ::std::distance(first, ptable[i])
|
||
|
// << ::std::endl;
|
||
|
borders.push_back(ptable[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
::std::sort(borders.begin(), borders.end());
|
||
|
|
||
|
assert(borders.size() - 1 == P);
|
||
|
for (unsigned i = 0; i < P; ++i)
|
||
|
{
|
||
|
partitions[i].first = borders[i];
|
||
|
partitions[i].second = borders[i + 1];
|
||
|
}
|
||
|
|
||
|
// ::std::cout << "LAST: " << p << ::std::endl;
|
||
|
// for (t = 0; t < P; ++t)
|
||
|
// ::std::cout << t << ": " << ::std::distance(first, partitions[t].first)
|
||
|
// << " - " << ::std::distance(first, partitions[t].second)
|
||
|
// << ::std::endl;
|
||
|
|
||
|
}
|
||
|
|
||
|
template<typename Iterator>
|
||
|
void _partition_range_stable_by_pivots(Iterator first, Iterator last,
|
||
|
const ::std::vector<typename
|
||
|
::std::iterator_traits<Iterator>::value_type> &pivots,
|
||
|
::std::vector< ::std::pair<Iterator, Iterator> > &partitions,
|
||
|
std::less<typename ::std::iterator_traits<Iterator>::value_type> comp,
|
||
|
const unsigned P)
|
||
|
{
|
||
|
typedef ::std::pair<Iterator, Iterator> Partition;
|
||
|
|
||
|
Iterator start = first;
|
||
|
for (unsigned i = 0; i < P - 1; ++i)
|
||
|
{
|
||
|
Iterator low = start;
|
||
|
|
||
|
while (low < last)
|
||
|
{
|
||
|
// Find a value not lower than the pivot.
|
||
|
while( (*low < pivots[i]) && (low < last) )
|
||
|
::std::advance(low, 1);
|
||
|
|
||
|
// Entire range scanned ?
|
||
|
if (low == last) break;
|
||
|
|
||
|
// Find a value lower than the pivot, starting from
|
||
|
// low, working our way up.
|
||
|
Iterator high = low;
|
||
|
::std::advance(high, 1);
|
||
|
while( !(*high < pivots[i]) && (high < last) )
|
||
|
::std::advance(high, 1);
|
||
|
|
||
|
// Entire range scanned ?
|
||
|
if (high == last) break;
|
||
|
|
||
|
// Swap values
|
||
|
assert( !(*low<pivots[i]) && (*high<pivots[i]) );
|
||
|
::std::iter_swap(low, high);
|
||
|
}
|
||
|
|
||
|
partitions[i] = Partition(start, low);
|
||
|
start = low;
|
||
|
}
|
||
|
partitions[P - 1] = Partition(start, last);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The sample ratio is used to sample more data. This way, the pivots can be
|
||
|
* chosen more wisely, which is our only guarantee we can generate partitions
|
||
|
* of equal size.
|
||
|
*/
|
||
|
template<typename RandomAccessIterator>
|
||
|
void _find_pivots(RandomAccessIterator first, RandomAccessIterator last,
|
||
|
::std::vector<typename
|
||
|
::std::iterator_traits<RandomAccessIterator>::value_type> &pivots,
|
||
|
const unsigned P, unsigned SAMPLE_RATIO = 10)
|
||
|
{
|
||
|
assert(SAMPLE_RATIO > 0);
|
||
|
const unsigned N = ::std::distance(first, last);
|
||
|
assert(N >= 2u*P);
|
||
|
|
||
|
// Adjust the constant. Erm.
|
||
|
while (SAMPLE_RATIO * (P + 1) > N)
|
||
|
SAMPLE_RATIO /= 2;
|
||
|
|
||
|
pivots.clear();
|
||
|
pivots.reserve(P - 1);
|
||
|
|
||
|
typedef typename
|
||
|
::std::iterator_traits<RandomAccessIterator>::value_type value_type;
|
||
|
|
||
|
::std::vector<value_type> samples;
|
||
|
const unsigned NSAMPLES = SAMPLE_RATIO * P + SAMPLE_RATIO;
|
||
|
samples.reserve(NSAMPLES);
|
||
|
|
||
|
for (unsigned i = 0; i < NSAMPLES; ++i)
|
||
|
{
|
||
|
const unsigned offset = i * (N-1) / (NSAMPLES - 1);
|
||
|
assert(offset < N);
|
||
|
samples.push_back(*(first + offset));
|
||
|
// std::cout << "offset: " << offset << " sample: " << samples[i] << std::endl;
|
||
|
}
|
||
|
assert(samples.size() == NSAMPLES);
|
||
|
|
||
|
// Sort samples to create relative ordering in data
|
||
|
::std::sort(samples.begin(), samples.end());
|
||
|
|
||
|
// Take pivots from sampled data
|
||
|
for (unsigned i = 1; i < P; ++i)
|
||
|
{
|
||
|
pivots.push_back(samples[i * samples.size() / P]);
|
||
|
/*std::cout << "pivot: " << i << " idx: " << (i * samples.size() / P)
|
||
|
<< " " << pivots[i-1] << std::endl;*/
|
||
|
}
|
||
|
assert(pivots.size() == P - 1);
|
||
|
}
|
||
|
|
||
|
} // namespace omptl
|
||
|
|
||
|
#endif /* OMPTL_TOOLS_H */
|