API
 
Loading...
Searching...
No Matches
buffers.hpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#ifndef TENSORRT_BUFFERS_H
18#define TENSORRT_BUFFERS_H
19
20#include "/opt/MagAOX/vendor/TensorRT-10.0.0.6/include/NvInfer.h"
21//#include "common.h"
22#include <cassert>
23#include <unordered_map>
24#include <unordered_set>
25#include <cuda_runtime_api.h>
26#include <iostream>
27#include <iterator>
28#include <memory>
29#include <new>
30#include <numeric>
31#include <string>
32#include <vector>
33
34#undef CHECK
35#define CHECK(status) \
36 do \
37 { \
38 auto ret = (status); \
39 if (ret != 0) \
40 { \
41 std::cerr << "Cuda failure: " << ret << std::endl; \
42 exit(EXIT_FAILURE); \
43 } \
44 } while (0)
45
46//! Return m rounded up to nearest multiple of n
47template <typename A, typename B>
48inline A divUp(A x, B n)
49{
50 return (x + n - 1) / n;
51}
52
53template <typename T1, typename T2>
54inline T1 roundUp(T1 m, T2 n)
55{
56 static_assert(std::is_integral<T1>::value && std::is_integral<T2>::value, "arguments must be integers");
57 static_assert(std::is_signed<T1>::value == std::is_signed<T2>::value, "mixed signedness not allowed");
58 static_assert(sizeof(T1) >= sizeof(T2), "first type must be as least as wide as second type");
59 return ((m + n - 1) / n) * n;
60}
61
62inline int64_t volume(nvinfer1::Dims const& d)
63{
64 return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies<int64_t>{});
65}
66
67
68inline uint32_t getElementSize(nvinfer1::DataType t) noexcept
69{
70 switch (t)
71 {
72 case nvinfer1::DataType::kINT64: return 8;
73 case nvinfer1::DataType::kINT32:
74 case nvinfer1::DataType::kFLOAT: return 4;
75 case nvinfer1::DataType::kBF16:
76 case nvinfer1::DataType::kHALF: return 2;
77 case nvinfer1::DataType::kBOOL:
78 case nvinfer1::DataType::kUINT8:
79 case nvinfer1::DataType::kINT8:
80 case nvinfer1::DataType::kFP8: return 1;
81 case nvinfer1::DataType::kINT4: return 0;
82 }
83 return 0;
84}
85
86
87//!
88//! \brief The GenericBuffer class is a templated class for buffers.
89//!
90//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation,
91//! deallocation, querying of buffers on both the device and the host.
92//! It can handle data of arbitrary types because it stores byte buffers.
93//! The template parameters AllocFunc and FreeFunc are used for the
94//! allocation and deallocation of the buffer.
95//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
96//! and returns bool. ptr is a pointer to where the allocated buffer address should be stored.
97//! size is the amount of memory in bytes to allocate.
98//! The boolean indicates whether or not the memory allocation was successful.
99//! FreeFunc must be a functor that takes in (void* ptr) and returns void.
100//! ptr is the allocated buffer address. It must work with nullptr input.
101//!
102template <typename AllocFunc, typename FreeFunc>
104{
105public:
106 //!
107 //! \brief Construct an empty buffer.
108 //!
109 GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
110 : mSize(0)
111 , mCapacity(0)
112 , mType(type)
113 , mBuffer(nullptr)
114 {
115 }
116
117 //!
118 //! \brief Construct a buffer with the specified allocation size in bytes.
119 //!
120 GenericBuffer(size_t size, nvinfer1::DataType type)
121 : mSize(size)
122 , mCapacity(size)
123 , mType(type)
124 {
125 if (!allocFn(&mBuffer, this->nbBytes()))
126 {
127 throw std::bad_alloc();
128 }
129 }
130
132 : mSize(buf.mSize)
133 , mCapacity(buf.mCapacity)
134 , mType(buf.mType)
135 , mBuffer(buf.mBuffer)
136 {
137 buf.mSize = 0;
138 buf.mCapacity = 0;
139 buf.mType = nvinfer1::DataType::kFLOAT;
140 buf.mBuffer = nullptr;
141 }
142
144 {
145 if (this != &buf)
146 {
148 mSize = buf.mSize;
149 mCapacity = buf.mCapacity;
150 mType = buf.mType;
151 mBuffer = buf.mBuffer;
152 // Reset buf.
153 buf.mSize = 0;
154 buf.mCapacity = 0;
155 buf.mBuffer = nullptr;
156 }
157 return *this;
158 }
159
160 //!
161 //! \brief Returns pointer to underlying array.
162 //!
163 void* data()
164 {
165 return mBuffer;
166 }
167
168 //!
169 //! \brief Returns pointer to underlying array.
170 //!
171 const void* data() const
172 {
173 return mBuffer;
174 }
175
176 //!
177 //! \brief Returns the size (in number of elements) of the buffer.
178 //!
179 size_t size() const
180 {
181 return mSize;
182 }
183
184 //!
185 //! \brief Returns the size (in bytes) of the buffer.
186 //!
187 size_t nbBytes() const
188 {
189 return this->size() * getElementSize(mType);
190 }
191
192 //!
193 //! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity.
194 //!
195 void resize(size_t newSize)
196 {
197 mSize = newSize;
198 if (mCapacity < newSize)
199 {
201 if (!allocFn(&mBuffer, this->nbBytes()))
202 {
203 throw std::bad_alloc{};
204 }
205 mCapacity = newSize;
206 }
207 }
208
209 //!
210 //! \brief Overload of resize that accepts Dims
211 //!
212 void resize(const nvinfer1::Dims& dims)
213 {
214 return this->resize(volume(dims));
215 }
216
218 {
220 }
221
222private:
223 size_t mSize{0}, mCapacity{0};
224 nvinfer1::DataType mType;
225 void* mBuffer;
226 AllocFunc allocFn;
227 FreeFunc freeFn;
228};
229
231{
232public:
233 bool operator()(void** ptr, size_t size) const
234 {
235 return cudaMalloc(ptr, size) == cudaSuccess;
236 }
237};
238
240{
241public:
242 void operator()(void* ptr) const
243 {
244 cudaFree(ptr);
245 }
246};
247
249{
250public:
251 bool operator()(void** ptr, size_t size) const
252 {
253 *ptr = malloc(size);
254 return *ptr != nullptr;
255 }
256};
257
259{
260public:
261 void operator()(void* ptr) const
262 {
263 free(ptr);
264 }
265};
266
269
270//!
271//! \brief The ManagedBuffer class groups together a pair of corresponding device and host buffers.
272//!
279
280//!
281//! \brief The BufferManager class handles host and device buffer allocation and deallocation.
282//!
283//! \details This RAII class handles host and device buffer allocation and deallocation,
284//! memcpy between host and device buffers to aid with inference,
285//! and debugging dumps to validate inference. The BufferManager class is meant to be
286//! used to simplify buffer management and any interactions between buffers and the engine.
287//!
289{
290public:
291 static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
292
293 //!
294 //! \brief Create a BufferManager for handling buffer interactions with engine, when the I/O tensor volumes
295 //! are provided
296 //!
298 std::shared_ptr<nvinfer1::ICudaEngine> engine, std::vector<int64_t> const& volumes, int32_t batchSize = 0)
299 : mEngine(engine)
300 , mBatchSize(batchSize)
301 {
302 // Create host and device buffers
303 for (int32_t i = 0; i < mEngine->getNbIOTensors(); i++)
304 {
305 auto const name = engine->getIOTensorName(i);
306 mNames[name] = i;
307
308 nvinfer1::DataType type = mEngine->getTensorDataType(name);
309
310 std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
311 manBuf->deviceBuffer = DeviceBuffer(volumes[i], type);
312 manBuf->hostBuffer = HostBuffer(volumes[i], type);
313 void* deviceBuffer = manBuf->deviceBuffer.data();
314 mDeviceBindings.emplace_back(deviceBuffer);
315 mManagedBuffers.emplace_back(std::move(manBuf));
316 }
317 }
318
319 //!
320 //! \brief Create a BufferManager for handling buffer interactions with engine.
321 //!
322 BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine, int32_t const batchSize = 0,
323 nvinfer1::IExecutionContext const* context = nullptr)
324 : mEngine(engine)
325 , mBatchSize(batchSize)
326 {
327 // Create host and device buffers
328 for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
329 {
330 auto const name = engine->getIOTensorName(i);
331 mNames[name] = i;
332
333 auto dims = context ? context->getTensorShape(name) : mEngine->getTensorShape(name);
334 size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
335 nvinfer1::DataType type = mEngine->getTensorDataType(name);
336 int32_t vecDim = mEngine->getTensorVectorizedDim(name);
337 if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
338 {
339 int32_t scalarsPerVec = mEngine->getTensorComponentsPerElement(name);
340 dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
341 vol *= scalarsPerVec;
342 }
343 vol *= volume(dims);
344 std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
345 manBuf->deviceBuffer = DeviceBuffer(vol, type);
346 manBuf->hostBuffer = HostBuffer(vol, type);
347 void* deviceBuffer = manBuf->deviceBuffer.data();
348 mDeviceBindings.emplace_back(deviceBuffer);
349 mManagedBuffers.emplace_back(std::move(manBuf));
350 }
351 }
352
353 //!
354 //! \brief Returns a vector of device buffers that you can use directly as
355 //! bindings for the execute and enqueue methods of IExecutionContext.
356 //!
357 std::vector<void*>& getDeviceBindings()
358 {
359 return mDeviceBindings;
360 }
361
362 //!
363 //! \brief Returns a vector of device buffers.
364 //!
365 std::vector<void*> const& getDeviceBindings() const
366 {
367 return mDeviceBindings;
368 }
369
370 //!
371 //! \brief Returns the device buffer corresponding to tensorName.
372 //! Returns nullptr if no such tensor can be found.
373 //!
374 void* getDeviceBuffer(std::string const& tensorName) const
375 {
376 return getBuffer(false, tensorName);
377 }
378
379 //!
380 //! \brief Returns the host buffer corresponding to tensorName.
381 //! Returns nullptr if no such tensor can be found.
382 //!
383 void* getHostBuffer(std::string const& tensorName) const
384 {
385 return getBuffer(true, tensorName);
386 }
387
388 //!
389 //! \brief Returns the size of the host and device buffers that correspond to tensorName.
390 //! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
391 //!
392 size_t size(std::string const& tensorName) const
393 {
394 auto record = mNames.find(tensorName);
395 if (record == mNames.end())
396 return kINVALID_SIZE_VALUE;
397 return mManagedBuffers[record->second]->hostBuffer.nbBytes();
398 }
399
400 //!
401 //! \brief Templated print function that dumps buffers of arbitrary type to std::ostream.
402 //! rowCount parameter controls how many elements are on each line.
403 //! A rowCount of 1 means that there is only 1 element on each line.
404 //!
405 template <typename T>
406 void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount)
407 {
408 assert(rowCount != 0);
409 assert(bufSize % sizeof(T) == 0);
410 T* typedBuf = static_cast<T*>(buf);
411 size_t numItems = bufSize / sizeof(T);
412 for (int32_t i = 0; i < static_cast<int>(numItems); i++)
413 {
414 // Handle rowCount == 1 case
415 if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
416 os << typedBuf[i] << std::endl;
417 else if (rowCount == 1)
418 os << typedBuf[i];
419 // Handle rowCount > 1 case
420 else if (i % rowCount == 0)
421 os << typedBuf[i];
422 else if (i % rowCount == rowCount - 1)
423 os << " " << typedBuf[i] << std::endl;
424 else
425 os << " " << typedBuf[i];
426 }
427 }
428
429 //!
430 //! \brief Copy the contents of input host buffers to input device buffers synchronously.
431 //!
433 {
434 memcpyBuffers(true, false, false);
435 }
436
437 //!
438 //! \brief Copy the contents of output device buffers to output host buffers synchronously.
439 //!
441 {
442 memcpyBuffers(false, true, false);
443 }
444
445 //!
446 //! \brief Copy the contents of input host buffers to input device buffers asynchronously.
447 //!
448 void copyInputToDeviceAsync(cudaStream_t const& stream = 0)
449 {
450 memcpyBuffers(true, false, true, stream);
451 }
452
453 //!
454 //! \brief Copy the contents of output device buffers to output host buffers asynchronously.
455 //!
456 void copyOutputToHostAsync(cudaStream_t const& stream = 0)
457 {
458 memcpyBuffers(false, true, true, stream);
459 }
460
461 ~BufferManager() = default;
462
463private:
464 void* getBuffer(bool const isHost, std::string const& tensorName) const
465 {
466 auto record = mNames.find(tensorName);
467 if (record == mNames.end())
468 return nullptr;
469 return (isHost ? mManagedBuffers[record->second]->hostBuffer.data()
470 : mManagedBuffers[record->second]->deviceBuffer.data());
471 }
472
473 bool tenosrIsInput(const std::string& tensorName) const
474 {
475 return mEngine->getTensorIOMode(tensorName.c_str()) == nvinfer1::TensorIOMode::kINPUT;
476 }
477
478 void memcpyBuffers(bool const copyInput, bool const deviceToHost, bool const async, cudaStream_t const& stream = 0)
479 {
480 for (auto const& n : mNames)
481 {
482 void* dstPtr = deviceToHost ? mManagedBuffers[n.second]->hostBuffer.data()
483 : mManagedBuffers[n.second]->deviceBuffer.data();
484 void const* srcPtr = deviceToHost ? mManagedBuffers[n.second]->deviceBuffer.data()
485 : mManagedBuffers[n.second]->hostBuffer.data();
486 size_t const byteSize = mManagedBuffers[n.second]->hostBuffer.nbBytes();
487 const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
488 if ((copyInput && tenosrIsInput(n.first)) || (!copyInput && !tenosrIsInput(n.first)))
489 {
490 if (async)
491 CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
492 else
493 CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
494 }
495 }
496 }
497
498 std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
499 int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
500 std::vector<std::unique_ptr<ManagedBuffer>> mManagedBuffers; //!< The vector of pointers to managed buffers
501 std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed for engine execution
502 std::unordered_map<std::string, int32_t> mNames; //!< The map of tensor name and index pairs
503};
504
505
506#endif // TENSORRT_BUFFERS_H
uint32_t getElementSize(nvinfer1::DataType t) noexcept
Definition buffers.hpp:68
GenericBuffer< HostAllocator, HostFree > HostBuffer
Definition buffers.hpp:268
A divUp(A x, B n)
Return m rounded up to nearest multiple of n.
Definition buffers.hpp:48
HostBuffer hostBuffer
Definition buffers.hpp:277
#define CHECK(status)
Definition buffers.hpp:35
GenericBuffer< DeviceAllocator, DeviceFree > DeviceBuffer
Definition buffers.hpp:267
int64_t volume(nvinfer1::Dims const &d)
Definition buffers.hpp:62
T1 roundUp(T1 m, T2 n)
Definition buffers.hpp:54
DeviceBuffer deviceBuffer
Definition buffers.hpp:276
The ManagedBuffer class groups together a pair of corresponding device and host buffers.
Definition buffers.hpp:274
The BufferManager class handles host and device buffer allocation and deallocation.
Definition buffers.hpp:289
std::vector< void * > const & getDeviceBindings() const
Returns a vector of device buffers.
Definition buffers.hpp:365
std::vector< std::unique_ptr< ManagedBuffer > > mManagedBuffers
The vector of pointers to managed buffers.
Definition buffers.hpp:500
BufferManager(std::shared_ptr< nvinfer1::ICudaEngine > engine, std::vector< int64_t > const &volumes, int32_t batchSize=0)
Create a BufferManager for handling buffer interactions with engine, when the I/O tensor volumes are ...
Definition buffers.hpp:297
std::shared_ptr< nvinfer1::ICudaEngine > mEngine
The pointer to the engine.
Definition buffers.hpp:498
void memcpyBuffers(bool const copyInput, bool const deviceToHost, bool const async, cudaStream_t const &stream=0)
Definition buffers.hpp:478
size_t size(std::string const &tensorName) const
Returns the size of the host and device buffers that correspond to tensorName. Returns kINVALID_SIZE_...
Definition buffers.hpp:392
void copyOutputToHostAsync(cudaStream_t const &stream=0)
Copy the contents of output device buffers to output host buffers asynchronously.
Definition buffers.hpp:456
void print(std::ostream &os, void *buf, size_t bufSize, size_t rowCount)
Templated print function that dumps buffers of arbitrary type to std::ostream. rowCount parameter con...
Definition buffers.hpp:406
void * getBuffer(bool const isHost, std::string const &tensorName) const
Definition buffers.hpp:464
void * getHostBuffer(std::string const &tensorName) const
Returns the host buffer corresponding to tensorName. Returns nullptr if no such tensor can be found.
Definition buffers.hpp:383
~BufferManager()=default
static const size_t kINVALID_SIZE_VALUE
Definition buffers.hpp:291
void copyOutputToHost()
Copy the contents of output device buffers to output host buffers synchronously.
Definition buffers.hpp:440
std::unordered_map< std::string, int32_t > mNames
The map of tensor name and index pairs.
Definition buffers.hpp:502
std::vector< void * > mDeviceBindings
The vector of device buffers needed for engine execution.
Definition buffers.hpp:501
void copyInputToDevice()
Copy the contents of input host buffers to input device buffers synchronously.
Definition buffers.hpp:432
std::vector< void * > & getDeviceBindings()
Returns a vector of device buffers that you can use directly as bindings for the execute and enqueue ...
Definition buffers.hpp:357
bool tenosrIsInput(const std::string &tensorName) const
Definition buffers.hpp:473
BufferManager(std::shared_ptr< nvinfer1::ICudaEngine > engine, int32_t const batchSize=0, nvinfer1::IExecutionContext const *context=nullptr)
Create a BufferManager for handling buffer interactions with engine.
Definition buffers.hpp:322
int mBatchSize
The batch size for legacy networks, 0 otherwise.
Definition buffers.hpp:499
void copyInputToDeviceAsync(cudaStream_t const &stream=0)
Copy the contents of input host buffers to input device buffers asynchronously.
Definition buffers.hpp:448
void * getDeviceBuffer(std::string const &tensorName) const
Returns the device buffer corresponding to tensorName. Returns nullptr if no such tensor can be found...
Definition buffers.hpp:374
bool operator()(void **ptr, size_t size) const
Definition buffers.hpp:233
void operator()(void *ptr) const
Definition buffers.hpp:242
The GenericBuffer class is a templated class for buffers.
Definition buffers.hpp:104
void * mBuffer
Definition buffers.hpp:225
size_t mCapacity
Definition buffers.hpp:223
FreeFunc freeFn
Definition buffers.hpp:227
GenericBuffer(GenericBuffer &&buf)
Definition buffers.hpp:131
GenericBuffer(size_t size, nvinfer1::DataType type)
Construct a buffer with the specified allocation size in bytes.
Definition buffers.hpp:120
const void * data() const
Returns pointer to underlying array.
Definition buffers.hpp:171
size_t nbBytes() const
Returns the size (in bytes) of the buffer.
Definition buffers.hpp:187
void * data()
Returns pointer to underlying array.
Definition buffers.hpp:163
AllocFunc allocFn
Definition buffers.hpp:226
nvinfer1::DataType mType
Definition buffers.hpp:224
void resize(size_t newSize)
Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity.
Definition buffers.hpp:195
GenericBuffer(nvinfer1::DataType type=nvinfer1::DataType::kFLOAT)
Construct an empty buffer.
Definition buffers.hpp:109
size_t size() const
Returns the size (in number of elements) of the buffer.
Definition buffers.hpp:179
void resize(const nvinfer1::Dims &dims)
Overload of resize that accepts Dims.
Definition buffers.hpp:212
GenericBuffer & operator=(GenericBuffer &&buf)
Definition buffers.hpp:143
bool operator()(void **ptr, size_t size) const
Definition buffers.hpp:251
void operator()(void *ptr) const
Definition buffers.hpp:261