Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
98e023d
Updated boost.compute to latest develop commit
shehzan10 Mar 30, 2016
8026cdb
Use stable sort for thrust and boost compute
shehzan10 Mar 30, 2016
09129b0
Allow complex types as value type in sort_by_key
shehzan10 Mar 30, 2016
cda059f
Improvements to sort
shehzan10 Mar 31, 2016
e6c9e93
Revert "Updated boost.compute to latest develop commit"
shehzan10 Mar 31, 2016
aaa0a05
Boost.Compute sort/sort_by_key are stable in v0.5. So revert to that
shehzan10 Mar 31, 2016
3422d01
Call modDims when setDataDims is called
shehzan10 Apr 1, 2016
3fea8a2
Fix temp Array T and moddims in CPU batched sort
shehzan10 Apr 1, 2016
03ce51f
Call thrust/compute::sort_by_key instead of kernel::sort_by_key wrapper
shehzan10 Apr 4, 2016
6c47b15
PERF Add batched sort to sort_index for CPU and CUDA
shehzan10 Apr 4, 2016
48dcfb0
Fix ordering of data from sort
shehzan10 Apr 18, 2016
c79b26f
Fix reordering of data for sort_index in cpu and cuda
shehzan10 Apr 19, 2016
08613a8
Added sort_by_key batching to CPU and CUDA
shehzan10 Apr 20, 2016
aaa13f6
Added tests for sort_index and sort_by_key for higher dimensions
shehzan10 Apr 20, 2016
8dad427
Add sort index, sort by key batching to OpenCL
shehzan10 Apr 21, 2016
efdc542
Combine sort_index and sort_by_key kernels in CPU
shehzan10 Apr 21, 2016
460bf6c
Combine sort_index and sort_by_key kernels in CUDA
shehzan10 Apr 21, 2016
363e86e
Combine sort_index and sort_by_key kernels in OpenCL
shehzan10 Apr 21, 2016
87513e0
Fix sort calls from harris and orb in CUDA
shehzan10 Apr 21, 2016
c6e08d5
Fix sort calls from harris and orb in OpenCL
shehzan10 Apr 21, 2016
8f48cdc
Clean up sort tests
shehzan10 Apr 21, 2016
b6a6a87
Fixed sort_by_key kernel for OpenCL
shehzan10 Apr 21, 2016
75b1a6b
Instantiate sort_by_key kernels in separately
shehzan10 Apr 22, 2016
bbdae15
Instantiate sort_by_key kernels in separately in opencl
shehzan10 Apr 22, 2016
cbefc1f
Instantiate sort_by_key kernels in separately in cpu
shehzan10 Apr 22, 2016
45574db
Sort by key cuda - create pair memory using memalloc, reasonable heur…
shehzan10 Apr 22, 2016
840ea28
Remove sort 0 dim restriction note from documentation
shehzan10 Apr 22, 2016
f6eae07
Add multi dimension support to median, tests
shehzan10 Apr 22, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions include/af/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,6 @@ namespace af
\return the sorted output

\ingroup sort_func_sort

\note \p dim is currently restricted to 0.
*/
AFAPI array sort(const array &in, const unsigned dim = 0, const bool isAscending = true);

Expand All @@ -372,8 +370,6 @@ namespace af
\param[in] isAscending specifies the sorting order

\ingroup sort_func_sort_index

\note \p dim is currently restricted to 0.
*/
AFAPI void sort(array &out, array &indices, const array &in, const unsigned dim = 0,
const bool isAscending = true);
Expand All @@ -388,8 +384,6 @@ namespace af
\param[in] isAscending specifies the sorting order

\ingroup sort_func_sort_keys

\note \p dim is currently restricted to 0.
*/
AFAPI void sort(array &out_keys, array &out_values, const array &keys, const array &values,
const unsigned dim = 0, const bool isAscending = true);
Expand Down Expand Up @@ -794,8 +788,6 @@ extern "C" {
\return \ref AF_SUCCESS if the execution completes properly

\ingroup sort_func_sort

\note \p dim is currently restricted to 0.
*/
AFAPI af_err af_sort(af_array *out, const af_array in, const unsigned dim, const bool isAscending);

Expand All @@ -810,8 +802,6 @@ extern "C" {
\return \ref AF_SUCCESS if the execution completes properly

\ingroup sort_func_sort_index

\note \p dim is currently restricted to 0.
*/
AFAPI af_err af_sort_index(af_array *out, af_array *indices, const af_array in,
const unsigned dim, const bool isAscending);
Expand All @@ -827,8 +817,6 @@ extern "C" {
\return \ref AF_SUCCESS if the execution completes properly

\ingroup sort_func_sort_keys

\note \p dim is currently restricted to 0.
*/
AFAPI af_err af_sort_by_key(af_array *out_keys, af_array *out_values,
const af_array keys, const af_array values,
Expand Down
13 changes: 7 additions & 6 deletions src/api/c/median.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ static af_array median(const af_array& in, const dim_t dim)
const Array<T> input = getArray<T>(in);
Array<T> sortedIn = sort<T, true>(input, dim);

int nElems = input.dims()[0];
double mid = (nElems + 1) / 2;
int dimLength = input.dims()[dim];
double mid = (dimLength + 1) / 2;
af_array left = 0;

af_seq slices[4] = {af_span, af_span, af_span, af_span};
Expand All @@ -78,7 +78,7 @@ static af_array median(const af_array& in, const dim_t dim)
af_array sortedIn_handle = getHandle<T>(sortedIn);
AF_CHECK(af_index(&left, sortedIn_handle, input.ndims(), slices));

if (nElems % 2 == 1) {
if (dimLength % 2 == 1) {
// mid-1 is our guy
if (input.isFloating()) return left;

Expand All @@ -90,7 +90,7 @@ static af_array median(const af_array& in, const dim_t dim)
return out;
} else {
// ((mid-1)+mid)/2 is our guy
dim4 dims = input.dims();
dim4 dims = input.dims();
af_array right = 0;
slices[dim] = af_make_seq(mid, mid, 1.0);

Expand All @@ -100,7 +100,8 @@ static af_array median(const af_array& in, const dim_t dim)
af_array carr = 0;
af_array result = 0;

dim4 cdims = dim4(1, dims[1], dims[2], dims[3]);
dim4 cdims = dims;
cdims[dim] = 1;
AF_CHECK(af_constant(&carr, 0.5, cdims.ndims(), cdims.get(), input.isDouble() ? f64 : f32));

if (!input.isFloating()) {
Expand Down Expand Up @@ -148,7 +149,7 @@ af_err af_median_all(double *realVal, double *imagVal, const af_array in)
af_err af_median(af_array* out, const af_array in, const dim_t dim)
{
try {
ARG_ASSERT(2, (dim>=0 && dim<=0));
ARG_ASSERT(2, (dim >= 0 && dim <= 4));

af_array output = 0;
ArrayInfo info = getInfo(in);
Expand Down
1 change: 0 additions & 1 deletion src/api/c/moddims.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ Array<T> modDims(const Array<T>& in, const af::dim4 &newDims)
Out = copyArray<T>(in);
}

Out.modDims(newDims);
Out.setDataDims(newDims);

return Out;
Expand Down
22 changes: 10 additions & 12 deletions src/api/c/sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ af_err af_sort(af_array *out, const af_array in, const unsigned dim, const bool
af_dtype type = info.getType();

DIM_ASSERT(1, info.elements() > 0);
// Only Dim 0 supported
ARG_ASSERT(2, dim == 0);

af_array val;

Expand Down Expand Up @@ -93,8 +91,6 @@ af_err af_sort_index(af_array *out, af_array *indices, const af_array in, const
af_dtype type = info.getType();

DIM_ASSERT(2, info.elements() > 0);
// Only Dim 0 supported
ARG_ASSERT(3, dim == 0);

af_array val;
af_array idx;
Expand Down Expand Up @@ -150,6 +146,8 @@ void sort_by_key_tmplt(af_array *okey, af_array *oval, const af_array ikey, cons
switch(vtype) {
case f32: sort_by_key<Tk, float >(okey, oval, ikey, ival, dim, isAscending); break;
case f64: sort_by_key<Tk, double >(okey, oval, ikey, ival, dim, isAscending); break;
case c32: sort_by_key<Tk, cfloat >(okey, oval, ikey, ival, dim, isAscending); break;
case c64: sort_by_key<Tk, cdouble>(okey, oval, ikey, ival, dim, isAscending); break;
case s32: sort_by_key<Tk, int >(okey, oval, ikey, ival, dim, isAscending); break;
case u32: sort_by_key<Tk, uint >(okey, oval, ikey, ival, dim, isAscending); break;
case s16: sort_by_key<Tk, short >(okey, oval, ikey, ival, dim, isAscending); break;
Expand All @@ -169,20 +167,20 @@ af_err af_sort_by_key(af_array *out_keys, af_array *out_values,
const unsigned dim, const bool isAscending)
{
try {
ArrayInfo info = getInfo(keys);
af_dtype type = info.getType();
ArrayInfo kinfo = getInfo(keys);
af_dtype ktype = kinfo.getType();

ArrayInfo vinfo = getInfo(values);

DIM_ASSERT(3, info.elements() > 0);
DIM_ASSERT(4, info.dims() == vinfo.dims());
// Only Dim 0 supported
ARG_ASSERT(5, dim == 0);
DIM_ASSERT(3, kinfo.elements() > 0);
DIM_ASSERT(4, kinfo.dims() == vinfo.dims());

TYPE_ASSERT(kinfo.isReal());

af_array oKey;
af_array oVal;

switch(type) {
switch(ktype) {
case f32: sort_by_key_tmplt<float >(&oKey, &oVal, keys, values, dim, isAscending); break;
case f64: sort_by_key_tmplt<double >(&oKey, &oVal, keys, values, dim, isAscending); break;
case s32: sort_by_key_tmplt<int >(&oKey, &oVal, keys, values, dim, isAscending); break;
Expand All @@ -193,7 +191,7 @@ af_err af_sort_by_key(af_array *out_keys, af_array *out_values,
case u64: sort_by_key_tmplt<uintl >(&oKey, &oVal, keys, values, dim, isAscending); break;
case u8: sort_by_key_tmplt<uchar >(&oKey, &oVal, keys, values, dim, isAscending); break;
case b8: sort_by_key_tmplt<char >(&oKey, &oVal, keys, values, dim, isAscending); break;
default: TYPE_ERROR(1, type);
default: TYPE_ERROR(1, ktype);
}
std::swap(*out_keys , oKey);
std::swap(*out_values , oVal);
Expand Down
1 change: 1 addition & 0 deletions src/backend/cpu/Array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ namespace cpu

void setDataDims(const dim4 &new_dims)
{
modDims(new_dims);
data_dims = new_dims;
}

Expand Down
3 changes: 2 additions & 1 deletion src/backend/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ FILE(GLOB cpu_headers
"*.h")

FILE(GLOB cpu_sources
"*.cpp")
"*.cpp"
"kernel/sort_by_key/*.cpp")

LIST(SORT cpu_headers)
LIST(SORT cpu_sources)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/cpu/kernel/sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace kernel

// Based off of http://stackoverflow.com/a/12399290
template<typename T, bool isAscending>
void sort0(Array<T> val)
void sort0Iterative(Array<T> val)
{
// initialize original index locations
T *val_ptr = val.get();
Expand Down
67 changes: 5 additions & 62 deletions src/backend/cpu/kernel/sort_by_key.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,79 +8,22 @@
********************************************************/

#pragma once
#include <af/defines.h>
#include <Array.hpp>
#include <math.hpp>
#include <algorithm>
#include <numeric>
#include <queue>
#include <err_cpu.hpp>
#include <functional>

namespace cpu
{
namespace kernel
{

template<typename Tk, typename Tv, bool isAscending>
void sort0_by_key(Array<Tk> okey, Array<Tv> oval, Array<uint> oidx,
const Array<Tk> ikey, const Array<Tv> ival)
{
function<bool(Tk, Tk)> op = std::greater<Tk>();
if(isAscending) { op = std::less<Tk>(); }

// Get pointers and initialize original index locations
uint *oidx_ptr = oidx.get();
Tk *okey_ptr = okey.get();
Tv *oval_ptr = oval.get();
const Tk *ikey_ptr = ikey.get();
const Tv *ival_ptr = ival.get();

std::vector<uint> seq_vec(oidx.dims()[0]);
std::iota(seq_vec.begin(), seq_vec.end(), 0);

const Tk *comp_ptr = nullptr;
auto comparator = [&comp_ptr, &op](size_t i1, size_t i2) {return op(comp_ptr[i1], comp_ptr[i2]);};

for(dim_t w = 0; w < ikey.dims()[3]; w++) {
dim_t okeyW = w * okey.strides()[3];
dim_t ovalW = w * oval.strides()[3];
dim_t oidxW = w * oidx.strides()[3];
dim_t ikeyW = w * ikey.strides()[3];
dim_t ivalW = w * ival.strides()[3];

for(dim_t z = 0; z < ikey.dims()[2]; z++) {
dim_t okeyWZ = okeyW + z * okey.strides()[2];
dim_t ovalWZ = ovalW + z * oval.strides()[2];
dim_t oidxWZ = oidxW + z * oidx.strides()[2];
dim_t ikeyWZ = ikeyW + z * ikey.strides()[2];
dim_t ivalWZ = ivalW + z * ival.strides()[2];
void sort0ByKeyIterative(Array<Tk> okey, Array<Tv> oval);

for(dim_t y = 0; y < ikey.dims()[1]; y++) {
template<typename Tk, typename Tv, bool isAscending, int dim>
void sortByKeyBatched(Array<Tk> okey, Array<Tv> oval);

dim_t okeyOffset = okeyWZ + y * okey.strides()[1];
dim_t ovalOffset = ovalWZ + y * oval.strides()[1];
dim_t oidxOffset = oidxWZ + y * oidx.strides()[1];
dim_t ikeyOffset = ikeyWZ + y * ikey.strides()[1];
dim_t ivalOffset = ivalWZ + y * ival.strides()[1];

uint *ptr = oidx_ptr + oidxOffset;
std::copy(seq_vec.begin(), seq_vec.end(), ptr);

comp_ptr = ikey_ptr + ikeyOffset;
std::stable_sort(ptr, ptr + ikey.dims()[0], comparator);

for (dim_t i = 0; i < oval.dims()[0]; ++i){
uint sortIdx = oidx_ptr[oidxOffset + i];
okey_ptr[okeyOffset + i] = ikey_ptr[ikeyOffset + sortIdx];
oval_ptr[ovalOffset + i] = ival_ptr[ivalOffset + sortIdx];
}
}
}
}

return;
}
template<typename Tk, typename Tv, bool isAscending>
void sort0ByKey(Array<Tk> okey, Array<Tv> oval);

}
}
19 changes: 19 additions & 0 deletions src/backend/cpu/kernel/sort_by_key/b8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#include <kernel/sort_by_key_impl.hpp>

namespace cpu
{
namespace kernel
{
INSTANTIATE1(char,true)
INSTANTIATE1(char,false)
}
}
19 changes: 19 additions & 0 deletions src/backend/cpu/kernel/sort_by_key/f32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#include <kernel/sort_by_key_impl.hpp>

namespace cpu
{
namespace kernel
{
INSTANTIATE1(float,true)
INSTANTIATE1(float,false)
}
}
19 changes: 19 additions & 0 deletions src/backend/cpu/kernel/sort_by_key/f64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#include <kernel/sort_by_key_impl.hpp>

namespace cpu
{
namespace kernel
{
INSTANTIATE1(double,true)
INSTANTIATE1(double,false)
}
}
19 changes: 19 additions & 0 deletions src/backend/cpu/kernel/sort_by_key/s16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#include <kernel/sort_by_key_impl.hpp>

namespace cpu
{
namespace kernel
{
INSTANTIATE1(short,true)
INSTANTIATE1(short,false)
}
}
19 changes: 19 additions & 0 deletions src/backend/cpu/kernel/sort_by_key/s32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*******************************************************
* Copyright (c) 2014, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#include <kernel/sort_by_key_impl.hpp>

namespace cpu
{
namespace kernel
{
INSTANTIATE1(int,true)
INSTANTIATE1(int,false)
}
}
Loading