18#ifndef TLX_SORT_PARALLEL_MERGESORT_HEADER
19#define TLX_SORT_PARALLEL_MERGESORT_HEADER
40namespace parallel_mergesort_detail {
43template <
typename DiffType>
56template <
typename RandomAccessIterator>
59 typename std::iterator_traits<RandomAccessIterator>::value_type;
61 typename std::iterator_traits<RandomAccessIterator>::difference_type;
92template <
typename RandomAccessIterator,
typename DiffType>
94 DiffType& num_samples,
103 static_cast<size_t>(num_samples + 1), es.
begin());
105 for (DiffType i = 0; i < num_samples; i++) {
119template <
bool Stable,
typename RandomAccessIterator,
typename Comparator>
127 typename std::iterator_traits<RandomAccessIterator>::value_type;
129 typename std::iterator_traits<RandomAccessIterator>::difference_type;
132 DiffType length_local = sd->
starts[iam + 1] - sd->
starts[iam];
134 using SortingPlacesIterator = ValueType *;
137 sd->
temporary[iam] =
static_cast<ValueType*
>(
138 ::operator
new (
sizeof(ValueType) * (length_local + 1)));
148 sd->
temporary[iam] + length_local, comp);
151 sd->
temporary[iam] + length_local, comp);
158 DiffType num_samples;
166 for (
size_t s = 0; s < num_threads; s++)
169 if (num_samples * iam > 0)
170 sd->
pieces[iam][s].begin =
173 sd->
samples[num_samples * iam],
178 sd->
pieces[iam][s].begin = 0;
180 if ((num_samples * (iam + 1)) < (num_samples * num_threads))
184 sd->
samples[num_samples * (iam + 1)],
197 SortingPlacesIterator> > seqs(num_threads);
199 for (
size_t s = 0; s < num_threads; s++)
200 seqs[s] = std::make_pair(
207 if (iam < num_threads - 1)
211 for (
size_t seq = 0; seq < num_threads; seq++)
214 if (iam < (num_threads - 1))
215 sd->
pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
223 for (
size_t seq = 0; seq < num_threads; seq++)
227 sd->
pieces[iam][seq].begin = sd->
pieces[iam - 1][seq].end;
230 sd->
pieces[iam][seq].begin = 0;
235 DiffType offset = 0, length_am = 0;
236 for (
size_t s = 0; s < num_threads; s++)
238 length_am += sd->
pieces[iam][s].end - sd->
pieces[iam][s].begin;
239 offset += sd->
pieces[iam][s].begin;
245 SortingPlacesIterator> > seqs(num_threads);
247 for (
size_t s = 0; s < num_threads; s++)
249 seqs[s] = std::make_pair(
255 seqs.begin(), seqs.end(),
256 sd->
source + offset, length_am, comp);
278template <
bool Stable,
279 typename RandomAccessIterator,
typename Comparator>
281 RandomAccessIterator begin,
282 RandomAccessIterator end,
284 size_t num_threads = std::thread::hardware_concurrency(),
287 using namespace parallel_mergesort_detail;
290 typename std::iterator_traits<RandomAccessIterator>::difference_type;
292 DiffType n = end - begin;
298 if (num_threads >
static_cast<size_t>(n))
299 num_threads =
static_cast<size_t>(n);
301 PMWMSSortingData<RandomAccessIterator> sd(num_threads);
309 for (
size_t s = 0; s < num_threads; s++)
310 sd.pieces[s].resize(num_threads);
312 DiffType* starts = sd.starts.data();
314 DiffType chunk_length = n / num_threads,
split = n % num_threads, start = 0;
315 for (
size_t i = 0; i < num_threads; i++)
318 start += (i < static_cast<size_t>(
split))
319 ? (chunk_length + 1) : chunk_length;
321 starts[num_threads] = start;
328#pragma omp parallel num_threads(num_threads)
330 size_t iam = omp_get_thread_num();
331 parallel_sort_mwms_pu<Stable>(
332 &sd, iam, num_threads, barrier, comp, mwmsa);
336 for (
size_t iam = 0; iam < num_threads; ++iam) {
337 threads[iam] = std::thread(
339 parallel_sort_mwms_pu<Stable>(
340 &sd, iam, num_threads, barrier, comp, mwmsa);
343 for (
size_t i = 0; i < num_threads; i++) {
358template <
typename RandomAccessIterator,
359 typename Comparator = std::less<
360 typename std::iterator_traits<RandomAccessIterator>::value_type> >
362 RandomAccessIterator begin,
363 RandomAccessIterator end,
364 Comparator comp = Comparator(),
365 size_t num_threads = std::thread::hardware_concurrency(),
369 begin, end, comp, num_threads, mwmsa);
381template <
typename RandomAccessIterator,
382 typename Comparator = std::less<
383 typename std::iterator_traits<RandomAccessIterator>::value_type> >
385 RandomAccessIterator begin,
386 RandomAccessIterator end,
387 Comparator comp = Comparator(),
388 size_t num_threads = std::thread::hardware_concurrency(),
392 begin, end, comp, num_threads, mwmsa);
Simpler non-growing vector without initialization.
iterator begin() noexcept
return mutable iterator to first element
Implements a thread barrier using mutex locking and condition variables that can be used to synchroni...
void wait(Lambda lambda=Lambda())
Waits for n threads to arrive.
size_t parallel_multiway_merge_oversampling
default oversampling factor for parallel_multiway_merge
void multisequence_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=Comparator())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
RandomAccessIterator3 multiway_merge_base(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, Comparator comp=Comparator(), MultiwayMergeAlgorithm mwma=MWMA_ALGORITHM_DEFAULT)
Sequential multi-way merging switch.
MultiwayMergeSplittingAlgorithm
Different splitting strategies for sorting/merging: by sampling, exact.
void parallel_mergesort_base(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Parallel multiway mergesort main call.
void parallel_mergesort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp=Comparator(), size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Parallel multiway mergesort.
void stable_parallel_mergesort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp=Comparator(), size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Stable parallel multiway mergesort.
std::vector< std::string > split(char sep, const std::string &str, std::string::size_type limit)
Split the given string at each separator character into distinct substrings.
DiffTypeOutputIterator equally_split(DiffType n, size_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
void determine_samples(PMWMSSortingData< RandomAccessIterator > *sd, DiffType &num_samples, size_t iam, size_t num_threads)
Select samples from a sequence.
void parallel_sort_mwms_pu(PMWMSSortingData< RandomAccessIterator > *sd, size_t iam, size_t num_threads, ThreadBarrierMutex &barrier, Comparator &comp, MultiwayMergeSplittingAlgorithm mwmsa)
PMWMS code executed by each thread.
static void sort(Iterator begin, Iterator end, Comparator cmp=Comparator())
Call best known sorting network for up to sixteen elements with given comparison method.
DiffType end
End of subsequence.
DiffType begin
Begin of subsequence.
Data accessed by all threads.
typename std::iterator_traits< RandomAccessIterator >::difference_type DiffType
RandomAccessIterator source
Input begin.
simple_vector< DiffType > starts
Start indices, per thread.
simple_vector< ValueType * > temporary
Storage in which to sort.
PMWMSSortingData(size_t num_threads)
simple_vector< ValueType > samples
Samples.
simple_vector< DiffType > offsets
Offsets to add to the found positions.
typename std::iterator_traits< RandomAccessIterator >::value_type ValueType
simple_vector< simple_vector< PMWMSPiece< DiffType > > > pieces
PMWMSPieces of data to merge [thread][sequence].