Convert CUDA JIT to use nvrtc instead of nvvm#1836
Conversation
umar456
left a comment
There was a problem hiding this comment.
Looks great. I added a couple of minor comments.
src/backend/cuda/Array.cpp
Outdated
| { | ||
| Node_ptr node(reinterpret_cast<Node *>(new BufferNode<T>(irname<T>(), afShortName<T>()))); | ||
| return node; | ||
| return Node_ptr(reinterpret_cast<Node *>(new BufferNode<T>(getFullName<T>(), |
There was a problem hiding this comment.
I am pretty sure you can just do this:
return Node_ptr(new BufferNode<T>(getFullName<T>(), shortname<T>(true)));| @@ -3,10 +3,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) | |||
| FIND_PACKAGE(CUDA 7.0 REQUIRED) | |||
|
|
|||
| INCLUDE(CLKernelToH) | |||
There was a problem hiding this comment.
We should change the name of this guy at some point.
There was a problem hiding this comment.
I am not doing it now :(
| kerStream << std::setw(2) << std::setfill('0') << std::hex << ids.child_ids[0]; | ||
| kerStream << std::setw(2) << std::setfill('0') << std::hex << ids.child_ids[1]; | ||
| kerStream << std::setw(2) << std::setfill('0') << std::hex << ids.id << std::dec; | ||
| // Make the dec representation of enum part of the Kernel name |
There was a problem hiding this comment.
These aren't templated so it would be better if they were implemented in the cpp file.
There was a problem hiding this comment.
I'll do another pass about reorganizing the JIT nodes at a later point (including trying to use the same code for CUDA and OpenCL JIT). Can we leave this be for now?
src/backend/cuda/JIT/BufferNode.hpp
Outdated
| std::shared_ptr<T> m_data; | ||
| Param<T> m_param; | ||
| unsigned m_bytes; | ||
| bool m_linear_buffer; |
There was a problem hiding this comment.
Since bool is most likely smaller than once_flag it would be better to place it after the m_set_data_flag variable. It would be good to look at the other variables while you are at it.
src/backend/cuda/JIT/BufferNode.hpp
Outdated
|
|
||
| bool isBuffer() { return true; } | ||
|
|
||
| ~BufferNode() |
There was a problem hiding this comment.
Is the destructor necessary? You aren't really doing anything in here.
src/backend/cuda/jit.cpp
Outdated
| "br i1 %cmp0, label %core, label %end\n"; | ||
| const char *includeFileStr = jit_cuh; | ||
|
|
||
| const char paramTStr[] ="" |
There was a problem hiding this comment.
Since we are using C++11 now, raw string literals would be easier to read.
| } else { | ||
| for (int i = 0; i < 4; i++) args.push_back((void *)(strides + i)); | ||
| for (int i = 0; i < 4; i++) args.push_back((void *)(dims + i)); | ||
| args.push_back((void *)&outputs[i]); |
There was a problem hiding this comment.
Looks like you know the size of the args vector. Could yo call the reserve function?
There was a problem hiding this comment.
We don't know the size of the args vector. It depends on what each type of the node is.
src/backend/cuda/jit.cpp
Outdated
| NVRTC_CHECK(nvrtcCreateProgram(&prog, jit_ker.c_str(), ker_name, 0, NULL, NULL)); | ||
| NVRTC_CHECK(nvrtcCompileProgram(prog, 0, NULL)); | ||
| NVRTC_CHECK(nvrtcGetPTXSize(prog, ptx_size)); | ||
| unique_ptr<char []> ptx(new char[*ptx_size]); |
src/backend/cuda/jit.cpp
Outdated
| kerStream << triple32; | ||
| outParamStream << "Param<" << full_nodes[id]->getTypeStr() << "> out" << id << ", \n"; | ||
| // Generate code to write the output | ||
| outWriteStream << "out" << id << ".ptr[idx] = " << "val" << id << ";\n"; |
There was a problem hiding this comment.
outWriteStream << "out" << id << ".ptr[idx] = val" << id << ";\n";
src/backend/cuda/jit.cpp
Outdated
| #include <stdexcept> | ||
| #include <thread> | ||
| #include <vector> | ||
| #include <kernel_headers/jit.hpp> |
There was a problem hiding this comment.
Could you separate the standard headers from the others?
|
Windows builds were failing because of a Windows macro definition. Fix #1839 should resolve this issue. Please rebase and test again. |
FloopCZ
left a comment
There was a problem hiding this comment.
That is a lot of great code.
Most of my review comments are of the OCD type, so feel free to ignore if you don't suffer from this terrible disorder. ;-)
src/backend/cuda/jit.cpp
Outdated
| unique_ptr<char []> log(new char[logSize]); \ | ||
| char *logptr = log.get(); \ | ||
| nvrtcGetProgramLog(prog, logptr); \ | ||
| logptr[logSize] = '\x0'; \ |
There was a problem hiding this comment.
I believe this write is one-past-end.
src/backend/cuda/types.hpp
Outdated
| template<typename T, bool binary> const std::string cuMangledName(const char *fn); | ||
| template<typename T > const char *afShortName(bool caps = true); | ||
| template<typename T > const char *irname(); | ||
| template<typename T > const char *shortname(bool caps = true); |
There was a problem hiding this comment.
OCD - extra space after typename T
src/backend/cuda/cast.hpp
Outdated
| } | ||
| }; | ||
|
|
||
|
|
There was a problem hiding this comment.
extra newline here and L65 and L60
(Nitpicking, but since the code looks so good and you already edit the line, it deserves to be perfect... ;-))
src/backend/cuda/JIT/BufferNode.hpp
Outdated
| if (is_linear) { | ||
| kerStream << idx_str << " = idx;\n"; | ||
| } else { | ||
| std::string info_str = std::string("in") + std::to_string(id);; |
| @@ -158,21 +123,6 @@ namespace JIT | |||
| bytes += m_bytes; | |||
| return; | |||
src/backend/cuda/binary.hpp
Outdated
| return "__c"#fn"f"; \ | ||
| } \ | ||
| }; \ | ||
| \ |
There was a problem hiding this comment.
OCD - newline in here, but not after any of the BinOps above
src/backend/cuda/types.cpp
Outdated
| #define INSTANTIATE(T) \ | ||
| template const std::string cuMangledName<T, false>(const char *fn); \ | ||
| template const std::string cuMangledName<T, true>(const char *fn); \ | ||
| template<typename T > const char *shortname(bool caps) { return caps ? "Q" : "q"; } |
There was a problem hiding this comment.
OCD - extra space after typename T (makes it not aligned)
src/backend/cuda/kernel/jit.cuh
Outdated
| double rhs_x = inv_rhs_abs * rhs.x; | ||
| double rhs_y = inv_rhs_abs * rhs.y; | ||
| cdouble out = {lhs.x * rhs_x + lhs.y * rhs_y, | ||
| lhs.y * rhs_x - lhs.x * rhs_y}; |
There was a problem hiding this comment.
OCD - prefix with one extra space to be aligned as L106
src/backend/opencl/kernel/jit.cl
Outdated
| double rhs_x = inv_rhs_abs * rhs.x; | ||
| double rhs_y = inv_rhs_abs * rhs.y; | ||
| double2 out = {lhs.x * rhs_x + lhs.y * rhs_y, | ||
| lhs.y * rhs_x - lhs.x * rhs_y}; |
There was a problem hiding this comment.
OCD - prefix with one space to make it aligned as L75
src/backend/cuda/jit.cpp
Outdated
| // Common CUDA code | ||
| // This part of the code does not change with the kernel. | ||
|
|
||
| static const char *kernelVoid = "extern \"C\" __global__ void\n"; |
There was a problem hiding this comment.
OCD - extra space after equal sign
No description provided.