API
 
Loading...
Searching...
No Matches
trt_engine_syh.hpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18//!
19//! TensorRTEngine.cpp
20//! This file contains the implementation of the ONNX MNIST sample. It creates the network using
21//! the MNIST onnx model.
22//! It can be run with the following command line:
23//! Command: ./sample_onnx_mnist [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
24//! [--useDLACore=<int>]
25//!
26
27// Define TRT entrypoints used in common code
28#define DEFINE_TRT_ENTRYPOINTS 1
29#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
30
31#include "argsParser.h"
32#include "buffers.h"
33#include "common.h"
34#include "logger.h"
35#include "parserOnnxConfig.h"
36
37#include "NvInfer.h"
38#include <cuda_runtime_api.h>
39
40#include <cstdlib>
41#include <fstream>
42#include <iostream>
43#include <sstream>
44
45using namespace nvinfer1;
46using samplesCommon::SampleUniquePtr;
47
48const std::string gSampleName = "TensorRT.deep_learning_AO";
49
50//! \brief The TensorRTEngine class implements a generic TensorRT model
51//!
52//! \details It creates the network using an ONNX model
53//!
55{
56public:
57 TensorRTEngine(const samplesCommon::OnnxSampleParams& params) : mParams(params), mRuntime(nullptr), mEngine(nullptr){}
58
59 //!
60 //! \brief Function builds the network engine
61 //!
62 bool build();
63
64 //!
65 //! \brief Runs the TensorRT inference engine for this sample
66 //!
67 bool infer();
68
69private:
70 samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
71
72 nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
73 nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
74 int mNumber{0}; //!< The number to classify
75
76 std::shared_ptr<nvinfer1::IRuntime> mRuntime; //!< The TensorRT runtime used to deserialize the engine
77 std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
78
79 //!
80 //! \brief Parses an ONNX model for MNIST and creates a TensorRT network
81 //!
82 bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
83 SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
84 SampleUniquePtr<nvonnxparser::IParser>& parser);
85
86 //!
87 //! \brief Reads the input and stores the result in a managed buffer
88 //!
89 bool processInput(float* filedata, const samplesCommon::BufferManager& buffers);
90
91};
92
93//!
94//! \brief Creates the network, configures the builder and creates the network engine
95//!
96//! \details This function creates the Onnx MNIST network by parsing the Onnx model and builds
97//! the engine that will be used to run MNIST (mEngine)
98//!
99//! \return true if the engine was created successfully and false otherwise
100//!
102{
103 auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
104 if (!builder)
105 {
106 return false;
107 }
108
109 auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
110 if (!network)
111 {
112 return false;
113 }
114
115 auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
116 if (!config)
117 {
118 return false;
119 }
120
121 auto parser = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
122 if (!parser)
123 {
124 return false;
125 }
126
127 auto constructed = constructNetwork(builder, network, config, parser);
128 if (!constructed)
129 {
130 return false;
131 }
132
133 // CUDA stream used for profiling by the builder.
134 auto profileStream = samplesCommon::makeCudaStream();
135 if (!profileStream)
136 {
137 return false;
138 }
139 config->setProfileStream(*profileStream);
140
141 SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
142 if (!plan)
143 {
144 return false;
145 }
146
147 mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
148 if (!mRuntime)
149 {
150 return false;
151 }
152
153 mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
154 mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
155 if (!mEngine)
156 {
157 return false;
158 }
159
160 ASSERT(network->getNbInputs() == 1);
161 mInputDims = network->getInput(0)->getDimensions();
162 ASSERT(mInputDims.nbDims == 4);
163
164 ASSERT(network->getNbOutputs() == 1);
165 mOutputDims = network->getOutput(0)->getDimensions();
166 ASSERT(mOutputDims.nbDims == 2);
167
168 return true;
169}
170
171//!
172//! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the
173//! output layers
174//!
175//! \param network Pointer to the network that will be populated with the Onnx MNIST network
176//!
177//! \param builder Pointer to the engine builder
178//!
179bool TensorRTEngine::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
180 SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
181 SampleUniquePtr<nvonnxparser::IParser>& parser)
182{
183 auto parsed = parser->parseFromFile(locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
184 static_cast<int>(sample::gLogger.getReportableSeverity()));
185 if (!parsed)
186 {
187 return false;
188 }
189
190 if (mParams.fp16)
191 {
192 config->setFlag(BuilderFlag::kFP16);
193 }
194 if (mParams.bf16)
195 {
196 config->setFlag(BuilderFlag::kBF16);
197 }
198 if (mParams.int8)
199 {
200 config->setFlag(BuilderFlag::kINT8);
201 samplesCommon::setAllDynamicRanges(network.get(), 127.0F, 127.0F);
202 }
203
204 samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
205
206 return true;
207}
208
209//!
210//! \brief Runs the TensorRT inference engine for this sample
211//!
212//! \details This function is the main execution function of the sample. It allocates the buffer,
213//! sets inputs and executes the engine.
214//!
216{
217 // Create RAII buffer manager object
218 samplesCommon::BufferManager buffers(mEngine);
219
220 auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
221 if (!context)
222 {
223 return false;
224 }
225
226 for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
227 {
228 auto const name = mEngine->getIOTensorName(i);
229 context->setTensorAddress(name, buffers.getDeviceBuffer(name));
230 }
231
232 // Read the input data into the managed buffers
233 ASSERT(mParams.inputTensorNames.size() == 1);
234 if (!processInput(buffers))
235 {
236 return false;
237 }
238
239 // Memcpy from host input buffers to device input buffers
240 buffers.copyInputToDevice();
241
242 bool status = context->executeV2(buffers.getDeviceBindings().data());
243 if (!status)
244 {
245 return false;
246 }
247
248 // Memcpy from device output buffers to host output buffers
249 buffers.copyOutputToHost();
250
251 // Verify results
252 if (!verifyOutput(buffers))
253 {
254 return false;
255 }
256
257 return true;
258}
259
260//!
261//! \brief Reads the input and stores the result in a managed buffer
262//!
263bool TensorRTEngine::processInput(float* filedata, const samplesCommon::BufferManager& buffers)
264{
265 const int inputC = mInputDims.d[1];
266 const int inputH = mInputDims.d[2];
267 const int inputW = mInputDims.d[3];
268
269 // Copy data into the buffer
270 float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
271 for (int i = 0; i < inputC * inputH * inputW; i++)
272 {
273 hostDataBuffer[i] = fileData[i];
274 }
275
276 return true;
277}
278
279//!
280//! \brief Initializes members of the params struct using the command line args
281//!
282samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args)
283{
284 samplesCommon::OnnxSampleParams params;
285 if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
286 {
287 params.dataDirs.push_back("data/mnist/");
288 params.dataDirs.push_back("data/samples/mnist/");
289 }
290 else // Use the data directory provided by the user
291 {
292 params.dataDirs = args.dataDirs;
293 }
294 params.onnxFileName = "mnist.onnx";
295 params.inputTensorNames.push_back("Input3");
296 params.outputTensorNames.push_back("Plus214_Output_0");
297 params.dlaCore = args.useDLACore;
298 params.int8 = args.runInInt8;
299 params.fp16 = args.runInFp16;
300 params.bf16 = args.runInBf16;
301
302 return params;
303}
304
305
306int main(int argc, char** argv)
307{
308 auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
309
310 sample::gLogger.reportTestStart(sampleTest);
311
313
314 sample::gLogInfo << "Building and running a GPU inference engine for Onnx MNIST" << std::endl;
315
316 if (!sample.build())
317 {
318 return sample::gLogger.reportFail(sampleTest);
319 }
320 if (!sample.infer())
321 {
322 return sample::gLogger.reportFail(sampleTest);
323 }
324
325 return sample::gLogger.reportPass(sampleTest);
326}
The TensorRTEngine class implements a generic TensorRT model.
std::shared_ptr< nvinfer1::IRuntime > mRuntime
The TensorRT runtime used to deserialize the engine.
bool infer()
Runs the TensorRT inference engine for this sample.
bool build()
Function builds the network engine.
nvinfer1::Dims mOutputDims
The dimensions of the output to the network.
nvinfer1::Dims mInputDims
The dimensions of the input to the network.
bool processInput(float *filedata, const samplesCommon::BufferManager &buffers)
Reads the input and stores the result in a managed buffer.
samplesCommon::OnnxSampleParams mParams
The parameters for the sample.
TensorRTEngine(const samplesCommon::OnnxSampleParams &params)
std::shared_ptr< nvinfer1::ICudaEngine > mEngine
The TensorRT engine used to run the network.
int mNumber
The number to classify.
bool constructNetwork(SampleUniquePtr< nvinfer1::IBuilder > &builder, SampleUniquePtr< nvinfer1::INetworkDefinition > &network, SampleUniquePtr< nvinfer1::IBuilderConfig > &config, SampleUniquePtr< nvonnxparser::IParser > &parser)
Parses an ONNX model for MNIST and creates a TensorRT network.
int main()
const std::string gSampleName
samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args &args)
Initializes members of the params struct using the command line args.
std::string locateFile(const std::string &filepathSuffix, const std::vector< std::string > &directories, bool reportError=true)
Definition util.hpp:85