tlx
insertion_sort.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/sort/strings/insertion_sort.hpp
3 *
4 * Base insertion string sort. This is an internal implementation header, see
5 * tlx/sort/strings.hpp for public front-end functions.
6 *
7 * Part of tlx - http://panthema.net/tlx
8 *
9 * Copyright (C) 2015-2019 Timo Bingmann <tb@panthema.net>
10 *
11 * All rights reserved. Published under the Boost Software License, Version 1.0
12 ******************************************************************************/
13
14#ifndef TLX_SORT_STRINGS_INSERTION_SORT_HEADER
15#define TLX_SORT_STRINGS_INSERTION_SORT_HEADER
16
17#include <tlx/define/likely.hpp>
20
21namespace tlx {
22
23//! \addtogroup tlx_sort
24//! \{
25
26namespace sort_strings_detail {
27
28/******************************************************************************/
29
30//! Generic insertion sort for abstract string sets. This method only requires
31//! O(1) additional memory for sorting n strings, but runs in time O(nD).
32template <typename StringPtr>
33static inline
35insertion_sort(const StringPtr& strptr, size_t depth, size_t /* memory */) {
36 typedef typename StringPtr::StringSet StringSet;
37 typedef typename StringSet::Iterator Iterator;
38 typedef typename StringSet::String String;
39 typedef typename StringSet::CharIterator CharIterator;
40
41 // this stores the begin iterator and size n, making the loops faster
42 const typename StringPtr::StringSet& ss = strptr.active();
43 size_t n = ss.size();
44 if (n <= 1) return;
45
46 const Iterator begin = ss.begin();
47 Iterator j;
48
49 for (Iterator i = begin + 1; TLX_UNLIKELY(--n != 0); ++i)
50 {
51 String tmp = std::move(ss[i]);
52 j = i;
53
54 while (TLX_LIKELY(j != begin))
55 {
56 CharIterator s = ss.get_chars(ss[j - 1], depth);
57 CharIterator t = ss.get_chars(tmp, depth);
58
59 while (TLX_LIKELY(ss.is_equal(ss[j - 1], s, tmp, t)))
60 ++s, ++t;
61
62 if (TLX_UNLIKELY(ss.is_leq(ss[j - 1], s, tmp, t))) {
63 break;
64 }
65
66 ss[j] = std::move(ss[j - 1]);
67 --j;
68 }
69
70 ss[j] = std::move(tmp);
71 }
72}
73
74/******************************************************************************/
75
76//! LCP insertion sort for abstract string sets. Enabled via SFINAE if
77//! StringPtr::with_lcp is true. This method only requires O(1) additional
78//! memory for sorting n strings, and runs in time O(n^2 + D).
79template <typename StringPtr>
80static inline
82insertion_sort(const StringPtr& strptr, size_t depth, size_t /* memory */) {
83 typedef typename StringPtr::StringSet StringSet;
84 typedef typename StringPtr::LcpType LcpType;
85 typedef typename StringSet::Iterator Iterator;
86 typedef typename StringSet::String String;
87 typedef typename StringSet::CharIterator CharIterator;
88
89 // this stores the begin iterator and size n, making the loops faster
90 const StringSet& ss = strptr.active();
91 size_t n = ss.size();
92 if (n <= 1) return;
93
94 const Iterator begin = ss.begin();
95
96 for (size_t j = 0; j < n - 1; ++j)
97 {
98 // insert strings[j] into sorted strings[0..j-1]
99
100 String new_str = std::move(ss[begin + j]);
101 LcpType new_lcp = depth; // start with LCP depth
102
103 size_t i = j;
104 while (i > 0)
105 {
106 LcpType prev_lcp = new_lcp;
107
108 String cur_str = std::move(ss[begin + i - 1]);
109 LcpType cur_lcp = strptr.get_lcp(i);
110
111 if (cur_lcp < new_lcp)
112 {
113 // CASE 1: lcp goes down -> insert string
114
115 // move comparison string back
116 ss[begin + i - 1] = std::move(cur_str);
117 break;
118 }
119 else if (cur_lcp == new_lcp)
120 {
121 // CASE 2: compare more characters
122
123 CharIterator c1 = ss.get_chars(new_str, new_lcp);
124 CharIterator c2 = ss.get_chars(cur_str, new_lcp);
125
126 while (ss.is_equal(new_str, c1, cur_str, c2))
127 ++c1, ++c2, ++new_lcp;
128
129 // if (new_str >= curr_str) -> insert string
130 if (!ss.is_less(new_str, c1, cur_str, c2))
131 {
132 // update lcp of prev (smaller string) with inserted string
133 strptr.set_lcp(i, new_lcp);
134 // lcp of inserted string with next string
135 new_lcp = prev_lcp;
136
137 // move comparison string back
138 ss[begin + i - 1] = std::move(cur_str);
139 break;
140 }
141 }
142 // else (cur_lcp > new_lcp), CASE 3: nothing to do
143
144 ss[begin + i] = std::move(cur_str);
145 strptr.set_lcp(i + 1, cur_lcp);
146
147 --i;
148 }
149
150 ss[begin + i] = std::move(new_str);
151 strptr.set_lcp(i + 1, new_lcp);
152 }
153
154 // last loop specialized with checks for out-of-bound access to lcp.
155 {
156 size_t j = n - 1;
157
158 // insert strings[j] into sorted strings[0..j-1]
159
160 String new_str = std::move(ss[begin + j]);
161 LcpType new_lcp = depth; // start with LCP depth
162
163 size_t i = j;
164 while (i > 0)
165 {
166 LcpType prev_lcp = new_lcp;
167
168 String cur_str = std::move(ss[begin + i - 1]);
169 LcpType cur_lcp = strptr.get_lcp(i);
170
171 if (cur_lcp < new_lcp)
172 {
173 // CASE 1: lcp goes down -> insert string
174
175 // move comparison string back
176 ss[begin + i - 1] = std::move(cur_str);
177 break;
178 }
179 else if (cur_lcp == new_lcp)
180 {
181 // CASE 2: compare more characters
182
183 CharIterator c1 = ss.get_chars(new_str, new_lcp);
184 CharIterator c2 = ss.get_chars(cur_str, new_lcp);
185
186 while (ss.is_equal(new_str, c1, cur_str, c2))
187 ++c1, ++c2, ++new_lcp;
188
189 // if (new_str >= curr_str) -> insert string
190 if (!ss.is_less(new_str, c1, cur_str, c2))
191 {
192 // update lcp of prev (smaller string) with inserted string
193 strptr.set_lcp(i, new_lcp);
194 // lcp of inserted string with next string
195 new_lcp = prev_lcp;
196
197 // move comparison string back
198 ss[begin + i - 1] = std::move(cur_str);
199 break;
200 }
201 }
202 // else (cur_lcp > new_lcp), CASE 3: nothing to do
203
204 ss[begin + i] = std::move(cur_str);
205 if (i + 1 < n) // check out-of-bounds copy
206 strptr.set_lcp(i + 1, cur_lcp);
207
208 --i;
209 }
210
211 ss[begin + i] = std::move(new_str);
212 if (i + 1 < n) { // check out-of-bounds save
213 strptr.set_lcp(i + 1, new_lcp);
214 }
215 }
216}
217
218/******************************************************************************/
219
220} // namespace sort_strings_detail
221
222//! \}
223
224} // namespace tlx
225
226#endif // !TLX_SORT_STRINGS_INSERTION_SORT_HEADER
227
228/******************************************************************************/
Objectified string array pointer array.
Definition: string_ptr.hpp:48
const StringSet & active() const
return currently active array
Definition: string_ptr.hpp:63
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
Definition: string_ptr.hpp:79
#define TLX_UNLIKELY(c)
Definition: likely.hpp:24
#define TLX_LIKELY(c)
Definition: likely.hpp:23
static enable_if<!StringPtr::with_lcp, void >::type insertion_sort(const StringPtr &strptr, size_t depth, size_t)
Generic insertion sort for abstract string sets.
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
Definition: enable_if.hpp:22