// Copyright (C) 2006 Fokko Beekhof // Email contact: Fokko.Beekhof@cui.unige.ch // The OMPTL library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #ifndef OMPTL_TOOLS_H #define OMPTL_TOOLS_H 1 #include #include #include #include #include namespace omptl { // Log of the number of operations that is expected to run faster in a single // thread. const unsigned C = 12; template T log2N_(T n) { if (n == 0) return 0; const unsigned b[] = #if (WORD_BIT == 32) {0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u}; #else // 64-bit {0x2u, 0xCu, 0xF0u, 0xFF00u, 0xFFFF0000u, 0xFFFFFFFF00000000u}; assert(WORD_BIT == 64); #endif const T S[] = {1u, 2u, 4u, 8u, 16u, 32u, 64u, 128u}; T result = 0u; // result of log2(v) will go here for (int i = static_cast(sizeof(T)); i >= 0; --i) { if (n & b[i]) { n >>= S[i]; result |= S[i]; } } return result; } template bool _linear_serial_is_faster(Iterator first, Iterator last, const unsigned P) { assert(P > 0u); assert(::std::distance(first, last) >= 0); const unsigned N = ::std::distance(first, last); return (N < 2u*P) || (log2N_(N) < C); } template bool _logn_serial_is_faster(Iterator first, Iterator last, const unsigned P) { assert(P > 0u); assert(::std::distance(first, last) >= 0); const unsigned N = ::std::distance(first, last); return (N < 2u*P) || (log2N_(N) < (1 << C)); } template bool _nlogn_serial_is_faster(Iterator first, Iterator last, const unsigned P) { assert(P > 0u); assert(::std::distance(first, last) >= 0); const unsigned N = ::std::distance(first, last); return (N < 2u*P) || (N*log2N_(N) < (1 << C)); } template void _copy_partitions(const ::std::vector< ::std::pair > &source_partitions, Iterator2 first, ::std::vector &dest_partitions, const unsigned P) { assert(source_partitions.size() == P); assert(dest_partitions.size() == P); for (unsigned i = 0; i < P; ++i) { dest_partitions[i] = first; // The last "advance" is very important, it may create space // if it is an InsertIterator or something like that. ::std::advance(first, ::std::distance( source_partitions[i].first, source_partitions[i].second) ); } } // Divide a given range into P partitions template void _partition_range(Iterator first, Iterator last, ::std::vector< ::std::pair > &partitions, const unsigned P) { assert(partitions.size() == P); typedef ::std::pair Partition; const unsigned N = ::std::distance(first, last); const unsigned range = N / P + ((N%P)? 1 : 0); assert(2u*P <= N); assert(range <= N); // All but last partition have same range Iterator currentLast = first; ::std::advance(currentLast, range); for (unsigned i = 0; i < P - 1; ++i) { partitions[i] = Partition(first, currentLast); first = currentLast; ::std::advance(currentLast, range); } // Last range may be shorter assert(::std::distance(first, last) <= range); partitions[P - 1] = Partition(first, last); } // Given a range, re-arrange the items such that all elements smaller than // the pivot precede all other values. Returns an Iterator to the first // element not smaller than the pivot. template Iterator _stable_pivot_range(Iterator first, Iterator last, const typename ::std::iterator_traits::value_type pivot, StrictWeakOrdering comp = std::less< typename ::std::iterator_traits::value_type>()) { Iterator pivotIt = last; while (first < last) { if (comp(*first, pivot)) ++first; else { Iterator high = first; while ( (++high < last) && !comp(*high, pivot) ) /* nop */; if (high < last) ::std::iter_swap(first, last); first = pivotIt = ++high; } } return pivotIt; } template Iterator _pivot_range(Iterator first, Iterator last, const typename ::std::iterator_traits::value_type pivot, StrictWeakOrdering comp) { while (first < last) { if (comp(*first, pivot)) ++first; else { while ( (first < --last) && !comp(*last, pivot) ) /* nop */; ::std::iter_swap(first, last); } } return last; } template void _partition_range_by_pivots(Iterator first, Iterator last, const ::std::vector::value_type> &pivots, ::std::vector< ::std::pair > &partitions, StrictWeakOrdering comp, const unsigned P) { assert(partitions.size() == P); typedef ::std::pair Partition; ::std::vector ptable(P); ::std::vector::value_type> pvts(pivots.size()); ::std::vector borders; ::std::vector used(pivots.size()); ::std::fill(used.begin(), used.end(), false); borders.push_back(first); borders.push_back(last); partitions[0].first = first; partitions[0].second = last; for (unsigned p = 1; (1 << p) <= (int)P; ++p) { const int PROC = (1 << p); const int PIVOTS = (1 << (p-1)); assert(PIVOTS <= (int)pivots.size()); #pragma omp parallel for //default(none) shared(used, pvts) for (int t = 0; t < PIVOTS; ++t) { const int index = int(P / PROC) + 2 * t * int(P / PROC) - 1; assert(index < (int)pivots.size()); assert(!used[index]); used[index] = true; pvts[t] = pivots[index]; /*::std::cout << "pvts T: " << t << " --> " << index << " " << pvts[t] << ::std::endl;*/ } #pragma omp parallel for //default(none) private(comp) // shared(ptable, pvts, partitions) for (int t = 0; t < PIVOTS; ++t) ptable[t] = _pivot_range(partitions[t].first, partitions[t].second, pvts[t], comp); for (int i = 0; i < PIVOTS; ++i) { // ::std::cout << "ADD: " << ::std::distance(first, ptable[t]) << ::std::endl; borders.push_back(ptable[i]); } ::std::sort(borders.begin(), borders.end()); for (unsigned i = 0; i < borders.size() - 1; ++i) { partitions[i].first = borders[i]; partitions[i].second = borders[i + 1]; } /*::std::cout << "PASS: " << p << ::std::endl; for (t = 0; t < (1 << p); ++t) ::std::cout << t << ": " << ::std::distance(first, partitions[t].first) << " - " << ::std::distance(first, partitions[t].second) << ::std::endl;*/ } for (unsigned i = 0; i < pivots.size(); ++i) if(!used[i]) pvts[i] = pivots[i]; #pragma omp parallel for // default(none) private(t, comp) // shared(used, ptable, pvts, partitions) for (int t = 0; t < int(pivots.size()); ++t) if (!used[t]) ptable[t] = _pivot_range(partitions[t].first, partitions[t].second, pvts[t], comp); for (unsigned i = 0; i < pivots.size(); ++i) { if (!used[i]) { // ::std::cout << "LAST ADD: " << ::std::distance(first, ptable[i]) // << ::std::endl; borders.push_back(ptable[i]); } } ::std::sort(borders.begin(), borders.end()); assert(borders.size() - 1 == P); for (unsigned i = 0; i < P; ++i) { partitions[i].first = borders[i]; partitions[i].second = borders[i + 1]; } // ::std::cout << "LAST: " << p << ::std::endl; // for (t = 0; t < P; ++t) // ::std::cout << t << ": " << ::std::distance(first, partitions[t].first) // << " - " << ::std::distance(first, partitions[t].second) // << ::std::endl; } template void _partition_range_stable_by_pivots(Iterator first, Iterator last, const ::std::vector::value_type> &pivots, ::std::vector< ::std::pair > &partitions, std::less::value_type> comp, const unsigned P) { typedef ::std::pair Partition; Iterator start = first; for (unsigned i = 0; i < P - 1; ++i) { Iterator low = start; while (low < last) { // Find a value not lower than the pivot. while( (*low < pivots[i]) && (low < last) ) ::std::advance(low, 1); // Entire range scanned ? if (low == last) break; // Find a value lower than the pivot, starting from // low, working our way up. Iterator high = low; ::std::advance(high, 1); while( !(*high < pivots[i]) && (high < last) ) ::std::advance(high, 1); // Entire range scanned ? if (high == last) break; // Swap values assert( !(*low void _find_pivots(RandomAccessIterator first, RandomAccessIterator last, ::std::vector::value_type> &pivots, const unsigned P, unsigned SAMPLE_RATIO = 10) { assert(SAMPLE_RATIO > 0); const unsigned N = ::std::distance(first, last); assert(N >= 2u*P); // Adjust the constant. Erm. while (SAMPLE_RATIO * (P + 1) > N) SAMPLE_RATIO /= 2; pivots.clear(); pivots.reserve(P - 1); typedef typename ::std::iterator_traits::value_type value_type; ::std::vector samples; const unsigned NSAMPLES = SAMPLE_RATIO * P + SAMPLE_RATIO; samples.reserve(NSAMPLES); for (unsigned i = 0; i < NSAMPLES; ++i) { const unsigned offset = i * (N-1) / (NSAMPLES - 1); assert(offset < N); samples.push_back(*(first + offset)); // std::cout << "offset: " << offset << " sample: " << samples[i] << std::endl; } assert(samples.size() == NSAMPLES); // Sort samples to create relative ordering in data ::std::sort(samples.begin(), samples.end()); // Take pivots from sampled data for (unsigned i = 1; i < P; ++i) { pivots.push_back(samples[i * samples.size() / P]); /*std::cout << "pivot: " << i << " idx: " << (i * samples.size() / P) << " " << pivots[i-1] << std::endl;*/ } assert(pivots.size() == P - 1); } } // namespace omptl #endif /* OMPTL_TOOLS_H */