OpenVDB 10.0.1
Loading...
Searching...
No Matches
ComputePrimitives.h
Go to the documentation of this file.
1// Copyright Contributors to the OpenVDB Project
2// SPDX-License-Identifier: MPL-2.0
3
4/// @file ComputePrimitives.h
5/// @brief A collection of parallel compute primitives
6
7#pragma once
8
9#if defined(NANOVDB_USE_CUDA)
10#include <cuda_runtime_api.h>
11#endif
12
13#if defined(NANOVDB_USE_TBB)
14#include <tbb/parallel_for.h>
15#include <tbb/blocked_range.h>
16#endif
17
18#include <utility>
19#include <tuple>
20
21
22// forward compatibility for C++14 Standard Library
23namespace cxx14 {
24template<std::size_t...>
26{
27};
28
29template<std::size_t N, std::size_t... Is>
30struct make_index_sequence : make_index_sequence<N - 1, N - 1, Is...>
31{
32};
33
34template<std::size_t... Is>
35struct make_index_sequence<0, Is...> : index_sequence<Is...>
36{
37};
38} // namespace cxx14
39
40#if defined(__CUDACC__)
41
42static inline bool checkCUDA(cudaError_t result, const char* file, const int line)
43{
44 if (result != cudaSuccess) {
45 std::cerr << "CUDA Runtime API error " << result << " in file " << file << ", line " << line << " : " << cudaGetErrorString(result) << ".\n";
46 return false;
47 }
48 return true;
49}
50
51#define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__)
52
53static inline void checkErrorCUDA(cudaError_t result, const char* file, const int line)
54{
55 if (result != cudaSuccess) {
56 std::cerr << "CUDA Runtime API error " << result << " in file " << file << ", line " << line << " : " << cudaGetErrorString(result) << ".\n";
57 exit(1);
58 }
59}
60
61#define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line)
62
63#endif
64
65template<typename Fn, typename... Args>
67{
68public:
69 ApplyFunc(int count, int blockSize, const Fn& fn, Args... args)
70 : mCount(count)
71 , mBlockSize(blockSize)
72 , mArgs(args...)
73 , mFunc(fn)
74 {
75 }
76
77 template<std::size_t... Is>
78 void call(int start, int end, cxx14::index_sequence<Is...>) const
79 {
80 mFunc(start, end, std::get<Is>(mArgs)...);
81 }
82
83 void operator()(int i) const
84 {
85 int start = i * mBlockSize;
86 int end = i * mBlockSize + mBlockSize;
87 if (end > mCount)
88 end = mCount;
89 call(start, end, cxx14::make_index_sequence<sizeof...(Args)>());
90 }
91
92#if defined(NANOVDB_USE_TBB)
93 void operator()(const tbb::blocked_range<int>& r) const
94 {
95 int start = r.begin();
96 int end = r.end();
97 if (end > mCount)
98 end = mCount;
99 call(start, end, cxx14::make_index_sequence<sizeof...(Args)>());
100 }
101#endif
102
103private:
104 int mCount;
105 int mBlockSize;
106 Fn mFunc;
107 std::tuple<Args...> mArgs;
108};
109
110#if defined(__CUDACC__)
111
112template<int WorkPerThread, typename FnT, typename... Args>
113__global__ void parallelForKernel(int numItems, FnT f, Args... args)
114{
115 for (int j=0;j<WorkPerThread;++j)
116 {
117 int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
118 if (i < numItems)
119 f(i, i + 1, args...);
120 }
121}
122
123#endif
124
125inline void computeSync(bool useCuda, const char* file, int line)
126{
127#if defined(__CUDACC__)
128 if (useCuda) {
129 NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
130 }
131#endif
132}
133
134inline void computeFill(bool useCuda, void* data, uint8_t value, size_t size)
135{
136 if (useCuda) {
137#if defined(__CUDACC__)
138 cudaMemset(data, value, size);
139#endif
140 } else {
141 std::memset(data, value, size);
142 }
143}
144
145template<typename FunctorT, typename... Args>
146inline void computeForEach(bool useCuda, int numItems, int blockSize, const char* file, int line, const FunctorT& op, Args... args)
147{
148 if (numItems == 0)
149 return;
150
151 if (useCuda) {
152#if defined(__CUDACC__)
153 static const int WorkPerThread = 1;
154 int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
155 parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems, op, args...);
156 NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
157#endif
158 } else {
159#if defined(NANOVDB_USE_TBB)
160 tbb::blocked_range<int> range(0, numItems, blockSize);
161 tbb::parallel_for(range, ApplyFunc<FunctorT, Args...>(numItems, blockSize, op, args...));
162#else
163 for (int i = 0; i < numItems; ++i)
164 op(i, i + 1, args...);
165#endif
166 }
167}
168
169inline void computeDownload(bool useCuda, void* dst, const void* src, size_t size)
170{
171 if (useCuda) {
172#if defined(__CUDACC__)
173 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
174#endif
175 } else {
176 std::memcpy(dst, src, size);
177 }
178}
179
180inline void computeCopy(bool useCuda, void* dst, const void* src, size_t size)
181{
182 if (useCuda) {
183#if defined(__CUDACC__)
184 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
185#endif
186 } else {
187 std::memcpy(dst, src, size);
188 }
189}
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args... args)
Definition: ComputePrimitives.h:146
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:169
void computeSync(bool useCuda, const char *file, int line)
Definition: ComputePrimitives.h:125
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition: ComputePrimitives.h:134
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:180
ValueT value
Definition: GridBuilder.h:1290
Definition: ComputePrimitives.h:67
void call(int start, int end, cxx14::index_sequence< Is... >) const
Definition: ComputePrimitives.h:78
void operator()(int i) const
Definition: ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args... args)
Definition: ComputePrimitives.h:69
Definition: ComputePrimitives.h:23
Definition: ComputePrimitives.h:26
Definition: ComputePrimitives.h:31