Skip to content

Commit d3b8d0f

Browse files
Merge pull request livecode#2496 from runrevmark/feature-faster_sort
[[ Perf ]] Improve sort speed
2 parents 8c2be48 + cf7bbff commit d3b8d0f

3 files changed

Lines changed: 431 additions & 70 deletions

File tree

engine/src/exec-strings.cpp

Lines changed: 351 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ along with LiveCode. If not see <http://www.gnu.org/licenses/>. */
2626
#include "handler.h"
2727
#include "variable.h"
2828
#include "hndlrlst.h"
29+
#include "osspec.h"
2930

3031
#include "scriptpt.h"
3132
#include "util.h"
@@ -2374,7 +2375,7 @@ void MCStringsSortAddItem(MCExecContext &ctxt, MCSortnode *items, uint4 &nitems,
23742375
nitems++;
23752376
}
23762377

2377-
void MCStringsExecSort(MCExecContext& ctxt, Sort_type p_dir, Sort_type p_form, MCStringRef *p_strings_array, uindex_t p_count, MCExpression *p_by, MCStringRef*& r_sorted_array, uindex_t& r_sorted_count)
2378+
void MCStringsExecSortOld(MCExecContext& ctxt, Sort_type p_dir, Sort_type p_form, MCStringRef *p_strings_array, uindex_t p_count, MCExpression *p_by, MCStringRef*& r_sorted_array, uindex_t& r_sorted_count)
23782379
{
23792380
// OK-2008-12-11: [[Bug 7503]] - If there are 0 items in the string, don't carry out the search,
23802381
// this keeps the behavior consistent with previous versions of Revolution.
@@ -2411,6 +2412,355 @@ void MCStringsExecSort(MCExecContext& ctxt, Sort_type p_dir, Sort_type p_form, M
24112412

24122413
////////////////////////////////////////////////////////////////////////////////
24132414

2415+
typedef bool (*comparator_t)(void *context, uindex_t left, uindex_t right);
2416+
typedef void (*freer_t)(void *keys, uindex_t count);
2417+
2418+
static void MCStringsDoSortIndirect(uindex_t *b, uint4 n, uindex_t *t, comparator_t is_less_or_equal, void *context)
2419+
{
2420+
if (n <= 1)
2421+
return;
2422+
2423+
uint4 n1 = n / 2;
2424+
uint4 n2 = n - n1;
2425+
uindex_t *b1 = b;
2426+
uindex_t *b2 = b + n1;
2427+
2428+
MCStringsDoSortIndirect(b1, n1, t, is_less_or_equal, context);
2429+
MCStringsDoSortIndirect(b2, n2, t, is_less_or_equal, context);
2430+
2431+
uindex_t *tmp = t;
2432+
while (n1 > 0 && n2 > 0)
2433+
{
2434+
if (is_less_or_equal(context, *b1, *b2))
2435+
{
2436+
*tmp++ = *b1++;
2437+
n1--;
2438+
}
2439+
else
2440+
{
2441+
*tmp++ = *b2++;
2442+
n2--;
2443+
}
2444+
}
2445+
2446+
for (uindex_t i = 0; i < n1; i++)
2447+
tmp[i] = b1[i];
2448+
for (uindex_t i = 0; i < (n - n2); i++)
2449+
b[i] = t[i];
2450+
}
2451+
2452+
static void MCStringsSortIndirect(uindex_t *p_items, uint4 nitems, comparator_t is_less_or_equal, void *context)
2453+
{
2454+
if (nitems == 0)
2455+
return;
2456+
2457+
uindex_t *tmp = new uindex_t[nitems];
2458+
MCStringsDoSortIndirect(p_items, nitems, tmp, is_less_or_equal, context);
2459+
delete[] tmp;
2460+
}
2461+
2462+
static bool double_comparator(void *p_context, uindex_t p_left, uindex_t p_right)
2463+
{
2464+
double *t_keys;
2465+
t_keys = (double *)p_context;
2466+
return t_keys[p_left] <= t_keys[p_right];
2467+
}
2468+
2469+
static void double_freer(void *keys, uindex_t count)
2470+
{
2471+
double *t_doubles;
2472+
t_doubles = (double *)keys;
2473+
delete[] t_doubles;
2474+
}
2475+
2476+
static bool data_comparator(void *p_context, uindex_t p_left, uindex_t p_right)
2477+
{
2478+
MCDataRef *t_keys;
2479+
t_keys = (MCDataRef *)p_context;
2480+
return MCDataCompareTo(t_keys[p_left], t_keys[p_right]) <= 0;
2481+
}
2482+
2483+
static bool string_comparator(void *p_context, uindex_t p_left, uindex_t p_right)
2484+
{
2485+
MCStringRef *t_keys;
2486+
t_keys = (MCStringRef *)p_context;
2487+
return MCStringCompareTo(t_keys[p_left], t_keys[p_right], kMCStringOptionCompareExact) <= 0;
2488+
}
2489+
2490+
2491+
static void valueref_freer(void *keys, uindex_t count)
2492+
{
2493+
MCValueRef *t_values;
2494+
t_values = (MCValueRef *)keys;
2495+
for(uindex_t i = 0; i < count; i++)
2496+
MCValueRelease(t_values[i]);
2497+
delete[] t_values;
2498+
}
2499+
2500+
static bool MCStringCopyFoldedAndRelease(MCStringRef p_string, MCStringOptions p_options, MCStringRef& r_folded_string)
2501+
{
2502+
if (p_options == kMCStringOptionCompareExact)
2503+
{
2504+
r_folded_string = p_string;
2505+
return true;
2506+
}
2507+
2508+
if (!MCStringMutableCopyAndRelease(p_string, p_string))
2509+
return false;
2510+
2511+
if (!MCStringFold(p_string, p_options))
2512+
return false;
2513+
2514+
if (!MCStringCopyAndRelease(p_string, p_string))
2515+
return false;
2516+
2517+
r_folded_string = p_string;
2518+
2519+
return true;
2520+
}
2521+
2522+
void MCStringsExecSort(MCExecContext& ctxt, Sort_type p_dir, Sort_type p_form, MCStringRef *p_items, uindex_t p_count, MCExpression *p_by, MCStringRef*& r_sorted_array, uindex_t& r_sorted_count)
2523+
{
2524+
// If there are no items to sort, do nothing.
2525+
if (p_count == 0)
2526+
return;
2527+
2528+
// Indicates if all items are stringrefs.
2529+
bool t_all_strings;
2530+
t_all_strings = true;
2531+
2532+
// Process the items if there is a 'by'.
2533+
MCValueRef *t_temp_items;
2534+
MCValueRef *t_items;
2535+
t_temp_items = nil;
2536+
if (p_by != nil)
2537+
{
2538+
t_temp_items = new MCValueRef[p_count];
2539+
MCerrorlock++;
2540+
for(uindex_t i = 0; i < p_count; i++)
2541+
{
2542+
MCeach -> set(ctxt, p_items[i]);
2543+
if (!ctxt . EvalExprAsValueRef(p_by, EE_UNDEFINED, t_temp_items[i]))
2544+
t_temp_items[i] = MCValueRetain(p_items[i]);
2545+
if (MCValueGetTypeCode(t_temp_items[i]) != kMCValueTypeCodeString)
2546+
t_all_strings = false;
2547+
}
2548+
t_items = t_temp_items;
2549+
}
2550+
else
2551+
t_items = (MCValueRef *)p_items;
2552+
2553+
// Build the vector of indicies to sort.
2554+
uindex_t *t_indicies;
2555+
t_indicies = new uindex_t[p_count];
2556+
for(uindex_t i = 0; i < p_count; i++)
2557+
t_indicies[i] = i;
2558+
2559+
// Now generate the sort keys - what type these are will depend on the
2560+
// type of sort.
2561+
void *t_sort_keys;
2562+
comparator_t t_sort_compare;
2563+
freer_t t_sort_freer;
2564+
2565+
switch(p_form)
2566+
{
2567+
case ST_DATETIME:
2568+
{
2569+
// DateTime is sorted by seconds.
2570+
double *t_seconds;
2571+
t_seconds = new double[p_count];
2572+
for(uindex_t i = 0; i < p_count; i++)
2573+
{
2574+
MCDateTime t_datetime;
2575+
if (!MCD_convert_to_datetime(ctxt, t_items[i], CF_UNDEFINED, CF_UNDEFINED, t_datetime) ||
2576+
!MCS_datetimetoseconds(t_datetime, t_seconds[i]))
2577+
t_seconds[i] = -MAXREAL8;
2578+
}
2579+
2580+
t_sort_keys = t_seconds;
2581+
t_sort_compare = double_comparator;
2582+
t_sort_freer = double_freer;
2583+
}
2584+
break;
2585+
2586+
case ST_NUMERIC:
2587+
{
2588+
double *t_numbers;
2589+
t_numbers = new double[p_count];
2590+
for(uindex_t i = 0; i < p_count; i++)
2591+
{
2592+
if (MCValueIsEmpty(t_items[i]))
2593+
{
2594+
t_numbers[i] = -MAXREAL8;
2595+
continue;
2596+
}
2597+
2598+
if (!ctxt . ConvertToReal(t_items[i], t_numbers[i]))
2599+
{
2600+
MCAutoStringRef t_string;
2601+
if (!ctxt . ConvertToString(t_items[i], &t_string))
2602+
{
2603+
t_numbers[i] = -MAXREAL8;
2604+
continue;
2605+
}
2606+
2607+
uindex_t t_start, t_end, t_length;
2608+
t_length = MCStringGetLength(*t_string);
2609+
t_start = 0;
2610+
2611+
// if there are consecutive spaces at the beginning, skip them
2612+
while (t_start < t_length && MCUnicodeIsWhitespace(MCStringGetCharAtIndex(*t_string, t_start)))
2613+
t_start++;
2614+
2615+
t_end = t_start;
2616+
while (t_end < t_length)
2617+
{
2618+
char_t t_char = MCStringGetNativeCharAtIndex(*t_string, t_end);
2619+
if (!isdigit((uint1)t_char) && t_char != '.' && t_char != '-' && t_char != '+')
2620+
break;
2621+
2622+
t_end++;
2623+
}
2624+
2625+
MCAutoStringRef t_numeric_part;
2626+
if (t_end == t_start ||
2627+
!MCStringCopySubstring(*t_string, MCRangeMake(t_start, t_end - t_start), &t_numeric_part) ||
2628+
!ctxt . ConvertToReal(*t_numeric_part, t_numbers[i]))
2629+
{
2630+
t_numbers[i] = -MAXREAL8;
2631+
continue;
2632+
}
2633+
}
2634+
}
2635+
2636+
t_sort_keys = t_numbers;
2637+
t_sort_compare = double_comparator;
2638+
t_sort_freer = double_freer;
2639+
}
2640+
break;
2641+
2642+
case ST_BINARY:
2643+
{
2644+
MCDataRef *t_datas;
2645+
t_datas = new MCDataRef[p_count];
2646+
for(uindex_t i = 0; i < p_count; i++)
2647+
{
2648+
if (!ctxt . ConvertToData(t_items[i], t_datas[i]))
2649+
t_datas[i] = MCValueRetain(kMCEmptyData);
2650+
}
2651+
2652+
t_sort_keys = t_datas;
2653+
t_sort_compare = data_comparator;
2654+
t_sort_freer = valueref_freer;
2655+
}
2656+
break;
2657+
2658+
case ST_TEXT:
2659+
{
2660+
MCStringOptions t_options;
2661+
t_options = ctxt . GetStringComparisonType();
2662+
if (t_options == kMCStringOptionCompareExact &&
2663+
t_all_strings)
2664+
{
2665+
t_sort_keys = t_items;
2666+
t_sort_compare = string_comparator;
2667+
t_sort_freer = nil;
2668+
}
2669+
else
2670+
{
2671+
MCStringRef *t_strings;
2672+
t_strings = new MCStringRef[p_count];
2673+
for(uindex_t i = 0; i < p_count; i++)
2674+
{
2675+
if (!ctxt . ConvertToString(t_items[i], t_strings[i]) ||
2676+
!MCStringCopyFoldedAndRelease(t_strings[i], t_options, t_strings[i]))
2677+
t_strings[i] = MCValueRetain(kMCEmptyString);
2678+
}
2679+
2680+
t_sort_keys = t_strings;
2681+
t_sort_compare = string_comparator;
2682+
t_sort_freer = valueref_freer;
2683+
}
2684+
}
2685+
break;
2686+
2687+
case ST_INTERNATIONAL:
2688+
{
2689+
MCUnicodeCollateOption t_options;
2690+
t_options = MCUnicodeCollateOptionFromCompareOption((MCUnicodeCompareOption)ctxt . GetStringComparisonType());
2691+
2692+
MCUnicodeCollatorRef t_collator;
2693+
/* UNCHECKED */ MCUnicodeCreateCollator(kMCSystemLocale, t_options, t_collator);
2694+
2695+
MCDataRef *t_datas;
2696+
t_datas = new MCDataRef[p_count];
2697+
for(uindex_t i = 0; i < p_count; i++)
2698+
{
2699+
MCAutoStringRef t_string;
2700+
if (!ctxt . ConvertToString(t_items[i], &t_string))
2701+
{
2702+
t_datas[i] = MCValueRetain(kMCEmptyData);
2703+
continue;
2704+
}
2705+
2706+
byte_t *t_key;
2707+
uindex_t t_key_length;
2708+
if (!MCUnicodeCreateSortKeyWithCollator(t_collator, MCStringGetCharPtr(*t_string), MCStringGetLength(*t_string), t_key, t_key_length))
2709+
{
2710+
t_datas[i] = MCValueRetain(kMCEmptyData);
2711+
continue;
2712+
}
2713+
2714+
if (!MCDataCreateWithBytesAndRelease(t_key, t_key_length, t_datas[i]))
2715+
{
2716+
free(t_key);
2717+
t_datas[i] = MCValueRetain(kMCEmptyData);
2718+
continue;
2719+
}
2720+
}
2721+
2722+
MCUnicodeDestroyCollator(t_collator);
2723+
2724+
t_sort_keys = t_datas;
2725+
t_sort_compare = data_comparator;
2726+
t_sort_freer = valueref_freer;
2727+
}
2728+
break;
2729+
2730+
default:
2731+
MCUnreachable();
2732+
break;
2733+
}
2734+
2735+
MCStringsSortIndirect(t_indicies, p_count, t_sort_compare, t_sort_keys);
2736+
2737+
t_sort_freer(t_sort_keys, p_count);
2738+
2739+
if (t_temp_items != nil)
2740+
{
2741+
for(uindex_t i = 0; i < p_count; i++)
2742+
MCValueRelease(t_temp_items[i]);
2743+
delete[] t_temp_items;
2744+
}
2745+
2746+
MCAutoArray<MCStringRef> t_sorted;
2747+
if (p_dir == ST_ASCENDING)
2748+
{
2749+
for (uindex_t i = 0; i < p_count; i++)
2750+
t_sorted . Push((MCStringRef)p_items[t_indicies[i]]);
2751+
}
2752+
else
2753+
{
2754+
for (uindex_t i = p_count; i > 0; i--)
2755+
t_sorted . Push((MCStringRef)p_items[t_indicies[i - 1]]);
2756+
}
2757+
t_sorted . Take(r_sorted_array, r_sorted_count);
2758+
2759+
delete[] t_indicies;
2760+
}
2761+
2762+
////////////////////////////////////////////////////////////////////////////////
2763+
24142764
// AL-2014-10-17: [[ BiDi ]] Returns the result of applying the bi-directional algorithm to text
24152765
void MCStringsEvalBidiDirection(MCExecContext& ctxt, MCStringRef p_string, MCStringRef& r_result)
24162766
{

libfoundation/include/foundation-unicode.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,25 @@ bool MCUnicodeCreateSortKey(MCLocaleRef, MCUnicodeCollateOption,
463463
const unichar_t* p_in, uindex_t p_in_length,
464464
byte_t* &r_out, uindex_t &r_out_length);
465465

466+
/////
467+
468+
// The collator reference - this is not a valueref!
469+
typedef void *MCUnicodeCollatorRef;
470+
471+
// Creates a collation object which can be reused.
472+
bool MCUnicodeCreateCollator(MCLocaleRef locale, MCUnicodeCollateOption options, MCUnicodeCollatorRef& r_collator);
473+
474+
// Destroys a previously create collator.
475+
void MCUnicodeDestroyCollator(MCUnicodeCollatorRef collator);
476+
477+
int32_t MCUnicodeCollateWithCollator(MCUnicodeCollatorRef collator,
478+
const unichar_t* p_first, uindex_t p_first_len,
479+
const unichar_t* p_second, uindex_t p_second_len);
480+
481+
// Create a sort key using the given collator.
482+
bool MCUnicodeCreateSortKeyWithCollator(MCUnicodeCollatorRef collator,
483+
const unichar_t* p_in, uindex_t p_in_length,
484+
byte_t* &r_out, uindex_t &r_out_length);
466485

467486
////////////////////////////////////////////////////////////////////////////////
468487

0 commit comments

Comments
 (0)