forked from Theano/libgpuarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkernel.h
More file actions
119 lines (105 loc) · 3.38 KB
/
kernel.h
File metadata and controls
119 lines (105 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#ifndef GPUARRAY_KERNEL_H
#define GPUARRAY_KERNEL_H
/** \file kernel.h
* \brief Kernel functions.
*/
#include <gpuarray/buffer.h>
#include <gpuarray/array.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFUSE_EMACS
}
#endif
/**
* Kernel information structure.
*/
typedef struct _GpuKernel {
/**
* Device kernel reference.
*/
gpukernel *k;
/**
* Argument buffer.
*/
void **args;
} GpuKernel;
/**
* Initialize a kernel structure.
*
* `lens` holds the size of each source string. If is it NULL or an
* element has a value of 0 the length will be determined using strlen()
* or equivalent code.
*
* \param k a kernel structure
* \param ctx context in which to build the kernel
* \param count number of source code strings
* \param strs C array of source code strings
* \param lens C array with the size of each string or NULL
* \param name name of the kernel function
* \param flags kernel use flags (see \ref ga_usefl)
* \param err_str (if not NULL) location to write GPU-backend provided debug info
*
* If `*err_str` is returned not NULL then it must be free()d by the caller
*
* \return GA_NO_ERROR if the operation is successful
* \return any other value if an error occured
*/
GPUARRAY_PUBLIC int GpuKernel_init(GpuKernel *k, gpucontext *ctx,
unsigned int count, const char **strs,
const size_t *lens, const char *name,
unsigned int argcount, const int *types,
int flags, char **err_str);
/**
* Clear and release data associated with a kernel.
*
* \param k the kernel to release
*/
GPUARRAY_PUBLIC void GpuKernel_clear(GpuKernel *k);
/**
* Returns the context in which a kernel was built.
*
* \param k a kernel
*
* \returns a context pointer
*/
GPUARRAY_PUBLIC gpucontext *GpuKernel_context(GpuKernel *k);
GPUARRAY_PUBLIC int GpuKernel_setarg(GpuKernel *k, unsigned int i, void *val);
/**
* Do a scheduling of local and global size for a kernel.
*
* This function will find an optimal grid and block size for the
* number of elements specified in n when running kernel k. The
* parameters may run a bit more instances than n for efficiency
* reasons, so your kernel must be ready to deal with that.
*
* If either gs or ls is not 0 on entry its value will not be altered
* and will be taken into account when choosing the other value.
*
* \param k the kernel to schedule for
* \param n number of elements to handle
* \param ls local size (in/out)
* \param gs grid size (in/out)
*/
GPUARRAY_PUBLIC int GpuKernel_sched(GpuKernel *k, size_t n,
size_t *ls, size_t *gs);
/**
* Launch the execution of a kernel.
*
* \param k the kernel to launch
* \param n dimensionality of the grid/blocks
* \param ls sizes of launch blocks
* \param gs sizes of launch grid
* \param amount of dynamic shared memory to allocate
* \param args table of pointers to arguments
*/
GPUARRAY_PUBLIC int GpuKernel_call(GpuKernel *k, unsigned int n,
const size_t *ls, const size_t *gs,
size_t shared, void **args);
GPUARRAY_PUBLIC int GpuKernel_binary(const GpuKernel *k, size_t *sz,
void **obj);
GPUARRAY_PUBLIC const char *GpuKernel_error(const GpuKernel *k, int err);
#ifdef __cplusplus
}
#endif
#endif