API
 
Loading...
Searching...
No Matches
trt_engine.hpp
Go to the documentation of this file.
1// Define TRT entrypoints used in common code
2#define DEFINE_TRT_ENTRYPOINTS 1
3#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
4
5#include "buffers.hpp"
6#include "logger.h"
8#include "util.hpp"
9
10#include "/opt/MagAOX/vendor/TensorRT-10.0.0.6/include/NvInfer.h"
11#include <cuda_runtime_api.h>
12
13#include <cstdlib>
14#include <fstream>
15#include <iostream>
16#include <sstream>
17using namespace nvinfer1;
18
21
23{
24public:
26 : mRuntime(nullptr)
27 , mEngine(nullptr)
28 , buffers(nullptr)
29 , context(nullptr) // This is different
30 {
31 }
32
33 bool build(std::string dataDirs, std::string onnxFileName, std::string engineDirs, std::string engineName, bool rebuildEngine);
34
35 bool load(std::string enginePath);
36
37 bool infer(float* inputData);
38
39 bool initializeBuffer();
40 float* getOutput();
41 inline int32_t getOutputSize(){return outputSize;};
42
44
45private:
46
47 std::shared_ptr<nvinfer1::IRuntime> mRuntime; // The TensorRT runtime used to deserialize the engine
48 std::shared_ptr<nvinfer1::ICudaEngine> mEngine; // The TensorRT engine used to run the network
49 std::shared_ptr<nvinfer1::IExecutionContext> context;
50
51 int32_t batch{1};
52 int32_t inputC{0};
53 int32_t inputH{0};
54 int32_t inputW{0};
55 int32_t outputSize{0};
56 const char * inputName;
57 const char * outputName;
58
59 bool constructNetwork(std::unique_ptr<nvinfer1::IBuilder>& builder,
60 std::unique_ptr<nvinfer1::INetworkDefinition>& network, std::unique_ptr<nvinfer1::IBuilderConfig>& config,
61 std::unique_ptr<nvonnxparser::IParser>& parser, std::string onnxFileName, std::string dataDirs);
62};
63
64
65bool TensorrtEngine::build(std::string dataDirs, std::string onnxFileName, std::string engineDirs, std::string engineName, bool rebuildEngine)
66{
67 const std::string enginePath = engineDirs + "/" + engineName;
68 gLogger = Logger();
69 std::cerr << "made a new logger" << std::endl;
70 if (!rebuildEngine){
71 if (doesFileExist(enginePath)) {
72 std::cout << "Engine found, not regenerating..." << std::endl;
73 return load(enginePath);
74 }else{
75 std::cout << "Engine not found... Let's build a new one." << std::endl;
76 }
77 }
78
79 auto builder = std::unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(gLogger));
80 if (!builder)
81 {
82 std::cerr << "Couldn't createInferBuilder" << std::endl;
83 return false;
84 }
85
86 auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
87 auto network = std::unique_ptr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
88 if (!network)
89 {
90 std::cerr << "Couldn't createNetworkV2" << std::endl;
91 return false;
92 }
93
94 auto config = std::unique_ptr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
95 if (!config)
96 {
97 std::cerr << "Couldn't createBuilderConfig" << std::endl;
98 return false;
99 }
100
101 auto parser = std::unique_ptr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, gLogger));
102 if (!parser)
103 {
104 std::cerr << "Couldn't createParser" << std::endl;
105 return false;
106 }
107
108 auto constructed = constructNetwork(builder, network, config, parser, onnxFileName, dataDirs);
109 if (!constructed)
110 {
111 std::cerr << "Couldn't constructNetwork" << std::endl;
112 return false;
113 }
114
115 // CUDA stream used for profiling by the builder.
116 auto profileStream = makeCudaStream();
117 if (!profileStream)
118 {
119 std::cerr << "Couldn't makeCudaStream" << std::endl;
120 return false;
121 }
122 config->setProfileStream(*profileStream);
123
124 // Register a single optimization profile
125 nvinfer1::IOptimizationProfile *optProfile = builder->createOptimizationProfile();
126 const auto input = network->getInput(0);
127 const auto output = network->getOutput(0);
128
129 const auto inputDims = input->getDimensions();
130 const auto outputDims = output->getDimensions();
131
132
133 inputName = input->getName();
134 outputName = output->getName();
135
136 inputC = inputDims.d[1];
137 inputH = inputDims.d[2];
138 inputW = inputDims.d[3];
139 outputSize = outputDims.d[1];
140
141 // Specify the optimization profile`
142 optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4(1, inputC, inputH, inputW));
143 optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4(1, inputC, inputH, inputW));
144 optProfile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4(1, inputC, inputH, inputW));
145 config->addOptimizationProfile(optProfile);
146
147 std::unique_ptr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
148 if (!plan)
149 {
150 std::cerr << "Couldn't buildSerializedNetwork" << std::endl;
151 return false;
152 }
153
154 mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(gLogger));
155 if (!mRuntime)
156 {
157 std::cerr << "Couldn't createInferRuntime" << std::endl;
158 return false;
159 }
160
161 mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
162 mRuntime->deserializeCudaEngine(plan->data(), plan->size()), InferDeleter());
163 if (!mEngine)
164 {
165 std::cerr << "Couldn't deserializeCudaEngine" << std::endl;
166 return false;
167 }
168
169 std::ofstream outfile(enginePath, std::ofstream::binary);
170 outfile.write(reinterpret_cast<const char *>(plan->data()), plan->size());
171 std::cout << "Successfully saved engine to " << enginePath << std::endl;
172
173 return true;
174}
175
176
177bool TensorrtEngine::load(const std::string enginePath)
178{
179 std::cout << enginePath << std::endl;
180
181 std::ifstream file(enginePath, std::ios::binary | std::ios::ate);
182 if (!file.is_open()) {
183 std::cerr << "Error, unable to open engine file from " << enginePath << std::endl;
184 return false;
185 }
186 std::streamsize size = file.tellg();
187 file.seekg(0, std::ios::beg);
188 if (size <= 0) {
189 std::cerr << "Error, invalid engine file size for " << enginePath << std::endl;
190 return false;
191 }
192
193 std::vector<char> engineBuffer(size);
194 if (!file.read(engineBuffer.data(), size)) {
195 std::cout << "Error, unable to read engine file from " << enginePath << std::endl;
196 return false;
197 }
198
199 mRuntime = std::shared_ptr<nvinfer1::IRuntime>{nvinfer1::createInferRuntime(gLogger)};
200 if (!mRuntime) {
201 std::cerr << "Error, failed to create inference runtime." << std::endl;
202 return false;
203 }
204 if (engineBuffer.size() != size) {
205 std::cerr << "Error, incomplete read of engine data" << std::endl;
206 return false;
207 }
208 std::cerr << engineBuffer.data() << std::endl;
209 std::cerr << engineBuffer.size() << std::endl;
210 mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(mRuntime->deserializeCudaEngine(engineBuffer.data(), engineBuffer.size()));
211
212 int numIOTensors = mEngine->getNbIOTensors();
213 std::cout << "Number of IO Tensors: " << numIOTensors << std::endl;
214
215 inputName = mEngine->getIOTensorName(0);
216 outputName = mEngine->getIOTensorName(1);
217 const auto inputDims = mEngine->getTensorShape(inputName);
218 const auto outputDims = mEngine->getTensorShape(outputName);
219
220 std::cout << inputName << " " << outputName << std::endl;
221
222 inputC = inputDims.d[1];
223 inputH = inputDims.d[2];
224 inputW = inputDims.d[3];
225 outputSize = outputDims.d[1];
226
227 return true;
228}
229
230bool TensorrtEngine::constructNetwork(std::unique_ptr<nvinfer1::IBuilder>& builder,
231 std::unique_ptr<nvinfer1::INetworkDefinition>& network, std::unique_ptr<nvinfer1::IBuilderConfig>& config,
232 std::unique_ptr<nvonnxparser::IParser>& parser, std::string onnxFileName, std::string dataDirs)
233{
234 const std::string onnxFilePath = dataDirs+ "/" + onnxFileName;
235 std::cout << "ONNX file: " << onnxFilePath << std::endl;
236 auto parsed = parser->parseFromFile(onnxFilePath.c_str(),0);
237 if (!parsed)
238 {
239 return false;
240 }
241
242 config->setFlag(BuilderFlag::kFP16);
243
244 // enableDLA(builder.get(), config.get(), -1);
245
246 return true;
247}
248
250 // Create RAII buffer manager object
251 //buffers = std::shared_ptr<BufferManager>(mEngine);
252 // buffers = std::make_shared<BufferManager>(mEngine);
254
255 context = std::shared_ptr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
256 if (!context)
257 {
258 return false;
259 }
260
261 for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
262 {
263 auto const name = mEngine->getIOTensorName(i);
264 context->setTensorAddress(name, buffers->getDeviceBuffer(name));
265 }
266 return true;
267}
268
269bool TensorrtEngine::infer(float* inputData)
270{
271 float* hostDataBuffer = static_cast<float*>(buffers->getHostBuffer(inputName));
272 for (int i = 0; i < inputC * inputH * inputW; i++)
273 {
274 hostDataBuffer[i] = inputData[i];
275 }
276
277 float test = 0;
278 for (int i = 0; i < (inputC * inputH * inputW); i++){
279 test += inputData[i] / 10000.0;
280 }
281
282 float test2 = 0;
283 for (int i = 0; i < (inputC * inputH * inputW); i++){
284 test2 += hostDataBuffer[i] / 10000.0;
285 }
286 std::cout << "test: " << test << " " << "test2: "<< test2 << std::endl;
287
288 // Memcpy from host input buffers to device input buffers
290
291 // Propagate through network
292 bool status = context->executeV2(buffers->getDeviceBindings().data());
293 if (!status)
294 {
295 return false;
296 }
297
298 // Memcpy from device output buffers to host output buffers
300
301 return true;
302}
303
305{
306 float* output = static_cast<float*>(buffers->getHostBuffer(outputName));
307 return output;
308}
The BufferManager class handles host and device buffer allocation and deallocation.
Definition buffers.hpp:289
void * getHostBuffer(std::string const &tensorName) const
Returns the host buffer corresponding to tensorName. Returns nullptr if no such tensor can be found.
Definition buffers.hpp:383
void copyOutputToHost()
Copy the contents of output device buffers to output host buffers synchronously.
Definition buffers.hpp:440
void copyInputToDevice()
Copy the contents of input host buffers to input device buffers synchronously.
Definition buffers.hpp:432
std::vector< void * > & getDeviceBindings()
Returns a vector of device buffers that you can use directly as bindings for the execute and enqueue ...
Definition buffers.hpp:357
void * getDeviceBuffer(std::string const &tensorName) const
Returns the device buffer corresponding to tensorName. Returns nullptr if no such tensor can be found...
Definition buffers.hpp:374
bool constructNetwork(std::unique_ptr< nvinfer1::IBuilder > &builder, std::unique_ptr< nvinfer1::INetworkDefinition > &network, std::unique_ptr< nvinfer1::IBuilderConfig > &config, std::unique_ptr< nvonnxparser::IParser > &parser, std::string onnxFileName, std::string dataDirs)
std::shared_ptr< nvinfer1::IExecutionContext > context
BufferManager * buffers
std::shared_ptr< nvinfer1::ICudaEngine > mEngine
const char * inputName
bool initializeBuffer()
int32_t getOutputSize()
int32_t outputSize
const char * outputName
float * getOutput()
bool load(std::string enginePath)
bool build(std::string dataDirs, std::string onnxFileName, std::string engineDirs, std::string engineName, bool rebuildEngine)
std::shared_ptr< nvinfer1::IRuntime > mRuntime
bool infer(float *inputData)
Logger gLogger
bool doesFileExist(const std::string &filepath)
Definition util.hpp:54
std::unique_ptr< cudaStream_t, decltype(StreamDeleter)> makeCudaStream()
Definition util.hpp:188