Skip to content

Commit e96facf

Browse files
committed
Implement Hybrid SampleSort, like quicksort but choosing many pivots
1 parent 8c32d2d commit e96facf

File tree

2 files changed

+272
-209
lines changed

2 files changed

+272
-209
lines changed

src/sorts/SampleSort.java

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
package sorts;
2+
3+
import templates.Sort;
4+
import utils.Delays;
5+
import utils.Highlights;
6+
import utils.Reads;
7+
import utils.Writes;
8+
9+
/* Timsort's predecessor in CPython, copied from
10+
* https://github.com/python/cpython/blob/56796f672fb571d80199cf08aa059db9df55257b/Objects/listobject.c
11+
* Mostly written by Tim Peters and Guido van Rossum
12+
*
13+
* This Hybrid SampleSort is very similar to quicksort, except instead
14+
* of selecting just one pivot, we randomly select MANY pivots,
15+
* recursively sort them, and then partition the rest of the elements
16+
* between those pivots, finally then sorting each bucket recursively.
17+
*
18+
* How many is "MANY"?
19+
* For 43--105 elements, use 15 pivots
20+
* For 106--250 elements, use 31 pivots
21+
* For 250--576 elements, use 63 pivots
22+
* For 576--1297 elements, use 127 pivots
23+
* ...
24+
* For n elements, use 1 less than a power of 2 close to n/ln(n)
25+
*
26+
*/
27+
28+
29+
public class SampleSort extends Sort {
30+
public SampleSort(Delays delayOps, Highlights markOps, Reads readOps, Writes writeOps) {
31+
super(delayOps, markOps, readOps, writeOps);
32+
33+
this.setSortPromptID("Sample");
34+
this.setRunAllID("Sample Sort");
35+
this.setReportSortID("SampleSort");
36+
this.setCategory("Hybrid Sorts");
37+
this.isComparisonBased(true);
38+
this.isBucketSort(false);
39+
this.isRadixSort(false);
40+
this.isUnreasonablySlow(false);
41+
this.setUnreasonableLimit(0);
42+
this.isBogoSort(false);
43+
}
44+
45+
private static int MINSIZE = 100;
46+
private static int MINPARTITIONSIZE = 40;
47+
private static int STACKSIZE = 60;
48+
49+
private static double READ_DELAY = 0.02;
50+
private static double FAST_READ_DELAY = 0.01;
51+
private static double ASSIGN_DELAY = 0.2;
52+
private static double SWAP_DELAY = 0.3;
53+
private static double FAST_ASSIGN_DELAY = 0.05;
54+
private static double FAST_SWAP_DELAY = 0.07;
55+
private static double COMPARE_DELAY = 0.2;
56+
private static double STACK_DELAY = 0.5;
57+
58+
private boolean less_than(int left, int right) {
59+
Delays.sleep(COMPARE_DELAY);
60+
return Reads.compare(left, right) < 0;
61+
}
62+
63+
private int get(int[] arr, int i, double delay) {
64+
Highlights.markArray(1, i);
65+
Delays.sleep(delay);
66+
return arr[i];
67+
}
68+
69+
public void binary_sort(int[] arr, int lo, int hi, int start) {
70+
int l, p, r, pivot;
71+
if (lo == start) {
72+
start++;
73+
}
74+
for (; start < hi; start++) {
75+
l = lo;
76+
r = start;
77+
pivot = get(arr, r, READ_DELAY);
78+
assert l < r;
79+
do {
80+
p = l + ((r - l) >> 1);
81+
if (less_than(pivot, get(arr, p, READ_DELAY))) {
82+
r = p;
83+
} else {
84+
l = p + 1;
85+
}
86+
} while (l < r);
87+
assert l == r;
88+
for (p = start; p > l; p--) {
89+
Writes.write(arr, p, arr[p-1], FAST_ASSIGN_DELAY, true, false);
90+
}
91+
Writes.write(arr, l, pivot, ASSIGN_DELAY, true, false);
92+
}
93+
}
94+
95+
private static int CUTOFFBASE = 4;
96+
private static long cutoff[] = {
97+
43, /* smallest N where we use 2**4-1=15 pivots*/
98+
106, /* etc */
99+
250,
100+
576,
101+
1298,
102+
2885,
103+
6339,
104+
13805,
105+
29843,
106+
64116,
107+
137030,
108+
291554,
109+
617916,
110+
1305130,
111+
2748295,
112+
5771662,
113+
12091672,
114+
25276798,
115+
52734615,
116+
109820537,
117+
228324027,
118+
473977813,
119+
982548444, /* smallest N such that k == 26 */
120+
2034159050 /* largest N that fits in signed 32-bit; k == 27 */
121+
};
122+
private int get_num_samples(int n) {
123+
int k;
124+
for (k = 0; k < cutoff.length; k++) {
125+
if (n < cutoff[k]) {
126+
break;
127+
}
128+
}
129+
return (1 << (k - 1 + CUTOFFBASE)) - 1;
130+
}
131+
132+
private void sample_sort(int[] arr, int lo, int hi) {
133+
int l, r, pivot, k, n, extra, top;
134+
boolean extraOnRight;
135+
int[][] stack = new int[STACKSIZE][3];
136+
n = hi - lo;
137+
if (n < MINSIZE) {
138+
binary_sort(arr, lo, hi, lo);
139+
return;
140+
}
141+
extra = get_num_samples(n);
142+
{
143+
int seed = n / extra; /* arbitrary */
144+
for (int i = 0; i < extra; i++) {
145+
seed = seed * 69069 + 7;
146+
int j = i + seed % (n - i);
147+
if (j < i) {
148+
j += n - i;
149+
}
150+
Writes.swap(arr, i, j, SWAP_DELAY, true, false);
151+
}
152+
}
153+
sample_sort(arr, lo, lo + extra);
154+
top = 0;
155+
lo += extra;
156+
extraOnRight = false;
157+
for (;;) {
158+
assert lo <= hi;
159+
n = hi - lo;
160+
if (n < MINPARTITIONSIZE || extra == 0) {
161+
if (n >= MINSIZE) {
162+
assert extra == 0;
163+
sample_sort(arr, lo, hi);
164+
} else {
165+
if (extraOnRight && extra != 0) {
166+
k = extra;
167+
do {
168+
Writes.swap(arr, lo, hi, FAST_SWAP_DELAY, true, false);
169+
++lo; ++hi;
170+
} while (--k > 0);
171+
}
172+
binary_sort(arr, lo - extra, hi, lo);
173+
}
174+
if (--top < 0) {
175+
break;
176+
}
177+
lo = stack[top][0];
178+
hi = stack[top][1];
179+
extra = stack[top][2];
180+
extraOnRight = false;
181+
if (extra < 0) {
182+
extra = -extra;
183+
extraOnRight = true;
184+
}
185+
continue;
186+
}
187+
k = extra >>= 1;
188+
if (extraOnRight) {
189+
do {
190+
Writes.swap(arr, lo, hi, FAST_SWAP_DELAY, true , false);
191+
++lo; ++hi;
192+
} while (k-- != 0);
193+
} else {
194+
while (k-- != 0) {
195+
--lo; --hi;
196+
Writes.swap(arr, lo, hi, FAST_SWAP_DELAY, true, false);
197+
}
198+
}
199+
--lo;
200+
pivot = get(arr, lo, READ_DELAY);
201+
202+
l = lo + 1;
203+
r = hi - 1;
204+
assert lo < l && l < r && r < hi;
205+
do {
206+
do {
207+
if (less_than(get(arr, l, FAST_READ_DELAY), pivot)) {
208+
++l;
209+
} else {
210+
break;
211+
}
212+
} while (l < r);
213+
214+
while (l < r) {
215+
int rval = get(arr, r--, FAST_READ_DELAY);
216+
if (less_than(rval, pivot)) {
217+
Writes.write(arr, r+1, arr[l], ASSIGN_DELAY, true, false);
218+
Writes.write(arr, l++, rval, ASSIGN_DELAY, true, false);
219+
break;
220+
}
221+
}
222+
} while (l < r);
223+
224+
assert lo < r && r <= l && l < hi;
225+
if (l == r) {
226+
if (less_than(get(arr, r, READ_DELAY), pivot)) {
227+
++l;
228+
} else {
229+
--r;
230+
}
231+
}
232+
assert(lo <= r && r+1 == l && l <= hi);
233+
assert(arr[lo] == pivot);
234+
Writes.write(arr, lo, get(arr, r, READ_DELAY), 0.01, true, false);
235+
Writes.write(arr, r, pivot, 0.01, true, false);
236+
237+
while (l < hi) {
238+
if (less_than(pivot, get(arr, l, FAST_READ_DELAY))) {
239+
break;
240+
} else {
241+
++l;
242+
}
243+
}
244+
245+
assert(lo <= r && r < l && l <= hi);
246+
assert(top < STACKSIZE);
247+
if (r - lo <= hi - l) {
248+
/* second is bigger */
249+
stack[top][0] = l;
250+
stack[top][1] = hi;
251+
stack[top][2] = -extra;
252+
hi = r;
253+
extraOnRight = false;
254+
}
255+
else {
256+
/* first is bigger */
257+
stack[top][0] = lo;
258+
stack[top][1] = r;
259+
stack[top][2] = extra;
260+
lo = l;
261+
extraOnRight = true;
262+
}
263+
Delays.sleep(STACK_DELAY);
264+
++top;
265+
}
266+
}
267+
268+
@Override
269+
public void runSort(int[] array, int currentLength, int bucketCount) {
270+
sample_sort(array, 0, currentLength);
271+
}
272+
}

0 commit comments

Comments
 (0)