From f27c01e3fa7d303e17fe68aebe06b95ad9e70ad7 Mon Sep 17 00:00:00 2001 From: Vardan Akopian Date: Tue, 16 Aug 2016 15:08:06 -0700 Subject: [PATCH 1/2] fix initial value for morph --- src/backend/cpu/kernel/morph.hpp | 7 +++++-- src/backend/cuda/kernel/morph.hpp | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/backend/cpu/kernel/morph.hpp b/src/backend/cpu/kernel/morph.hpp index d990bb873b..0589dd222f 100644 --- a/src/backend/cpu/kernel/morph.hpp +++ b/src/backend/cpu/kernel/morph.hpp @@ -8,6 +8,7 @@ ********************************************************/ #pragma once +#include #include #include @@ -37,7 +38,8 @@ void morph(Array out, Array const in, Array const mask) // j steps along 2nd dimension for(dim_t i=0; i::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) + : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); // wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wj=0; wj out, Array const in, Array const mask) // j steps along 2nd dimension for(dim_t i=0; i::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) + : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wk=0; wk #include #include #include @@ -102,7 +103,8 @@ static __global__ void morphKernel(Param out, CParam in, __syncthreads(); const T * d_filt = (const T *)cFilter; - T acc = shrdMem[ lIdx(i, j, shrdLen, 1) ]; + T acc = isDilation ? (std::is_integral::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) + : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); #pragma unroll for(int wj=0; wj out, CParam in, int nBBS) int k = lz + halo; const T * d_filt = (const T *)cFilter; - T acc = shrdMem[ lIdx3D(i, j, k, shrdArea, shrdLen, 1) ]; + T acc = isDilation ? (std::is_integral::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) + : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); #pragma unroll for(int wk=0; wk Date: Wed, 17 Aug 2016 13:59:40 -0400 Subject: [PATCH 2/2] Fixing initial values in morph functions for all backends --- src/backend/cpu/kernel/morph.hpp | 11 ++++-- src/backend/cuda/kernel/morph.hpp | 7 ++-- src/backend/opencl/kernel/morph.cl | 4 +- src/backend/opencl/kernel/morph.hpp | 60 ++++++++++++++++------------- 4 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/backend/cpu/kernel/morph.hpp b/src/backend/cpu/kernel/morph.hpp index 0589dd222f..4cec3b363a 100644 --- a/src/backend/cpu/kernel/morph.hpp +++ b/src/backend/cpu/kernel/morph.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace cpu { @@ -31,6 +32,8 @@ void morph(Array out, Array const in, Array const mask) const dim_t R0 = window[0]/2; const dim_t R1 = window[1]/2; + T init = IsDilation ? Binary().init() : Binary().init(); + for(dim_t b3=0; b3 out, Array const in, Array const mask) // j steps along 2nd dimension for(dim_t i=0; i::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) - : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); + T filterResult = init; // wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wj=0; wj out, Array const in, Array const mask) const T* inData = in.get(); const T* filter = mask.get(); + T init = IsDilation ? Binary().init() : Binary().init(); + for(dim_t batchId=0; batchId out, Array const in, Array const mask) // j steps along 2nd dimension for(dim_t i=0; i::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) - : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); + T filterResult = init; // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wk=0; wk #include #include +#include #include "shared.hpp" namespace cuda @@ -103,8 +104,7 @@ static __global__ void morphKernel(Param out, CParam in, __syncthreads(); const T * d_filt = (const T *)cFilter; - T acc = isDilation ? (std::is_integral::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) - : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); + T acc = isDilation ? Binary().init() : Binary().init(); #pragma unroll for(int wj=0; wj out, CParam in, int nBBS) int k = lz + halo; const T * d_filt = (const T *)cFilter; - T acc = isDilation ? (std::is_integral::value ? std::numeric_limits::lowest() : -std::numeric_limits::infinity()) - : (std::is_integral::value ? std::numeric_limits::max() : std::numeric_limits::infinity()); + T acc = isDilation ? Binary().init() : Binary().init(); #pragma unroll for(int wk=0; wk #include #include +#include +#include using cl::Buffer; using cl::Program; @@ -54,19 +56,22 @@ void morph(Param out, int device = getActiveDeviceId(); std::call_once( compileFlags[device], [device] () { - std::ostringstream options; - options << " -D T=" << dtype_traits::getName() - << " -D isDilation="<< isDilation - << " -D windLen=" << windLen; - if (std::is_same::value || - std::is_same::value) { - options << " -D USE_DOUBLE"; - } - Program prog; - buildProgram(prog, morph_cl, morph_cl_len, options.str()); - morProgs[device] = new Program(prog); - morKernels[device] = new Kernel(*morProgs[device], "morph"); - }); + ToNum toNum; + T init = isDilation ? Binary().init() : Binary().init(); + std::ostringstream options; + options << " -D T=" << dtype_traits::getName() + << " -D isDilation="<< isDilation + << " -D init=" << toNum(init) + << " -D windLen=" << windLen; + if (std::is_same::value || + std::is_same::value) { + options << " -D USE_DOUBLE"; + } + Program prog; + buildProgram(prog, morph_cl, morph_cl_len, options.str()); + morProgs[device] = new Program(prog); + morKernels[device] = new Kernel(*morProgs[device], "morph"); + }); auto morphOp = KernelFunctor toNum; + T init = isDilation ? Binary().init() : Binary().init(); + std::ostringstream options; + options << " -D T=" << dtype_traits::getName() + << " -D isDilation="<< isDilation + << " -D init=" << toNum(init) + << " -D windLen=" << windLen; + if (std::is_same::value || + std::is_same::value) { + options << " -D USE_DOUBLE"; + } + Program prog; + buildProgram(prog, morph_cl, morph_cl_len, options.str()); + morProgs[device] = new Program(prog); + morKernels[device] = new Kernel(*morProgs[device], "morph3d"); + }); auto morphOp = KernelFunctor