API
 
Loading...
Searching...
No Matches
minimal_trt.cpp
Go to the documentation of this file.
1
2#include <NvInfer.h>
3#include <cuda_runtime_api.h>
4#include <iostream>
5#include <fstream>
6#include <vector>
7
8using namespace nvinfer1;
9
10// Function to read the TensorRT engine file
11bool readEngineFile(const std::string& filename, std::vector<char>* buffer) {
12 std::ifstream file(filename, std::ios::binary);
13 if (!file) {
14 std::cerr << "Error opening " << filename << std::endl;
15 return false;
16 }
17 file.seekg(0, std::ios::end);
18 std::vector<char> inbuf(file.tellg());
19 file.seekg(0, std::ios::beg);
20 file.read(inbuf.data(), inbuf.size());
21 buffer = &inbuf;
22 return true;
23}
24
25// Logger for TensorRT info/warning/errors
26class Logger : public ILogger {
27 void log(Severity severity, const char* msg) noexcept override {
28 if (severity <= Severity::kWARNING) {
29 std::cout << msg << std::endl;
30 }
31 }
32};
33
34int main() {
35 // Initialize Logger
36 Logger logger;
37 std::cerr << "logger = " << &logger << std::endl;
38
39 // Load the TensorRT engine file
40 // const std::string engineFile = "/data/users/xsup/MachineLearning/2024B_OnSky/Engines/test_model_20241114.trt";
41 const std::string engineFile = "/data/users/xsup/MachineLearning/2024B_OnSky/Models/test_build_2024b";
42 std::vector<char> engineData;
43 // bool result = readEngineFile(engineFile, &engineData);
44 // if (!result) {
45 // return 1;
46 // }
47 std::ifstream file(engineFile, std::ios::binary);
48 if (!file) {
49 std::cerr << "Error opening " << engineFile << std::endl;
50 return false;
51 }
52 file.seekg(0, std::ios::end);
53 std::vector<char> inbuf(file.tellg());
54 file.seekg(0, std::ios::beg);
55 file.read(inbuf.data(), inbuf.size());
56 engineData = inbuf;
57
58 // Create the runtime and deserialize the engine
59 IRuntime* runtime = createInferRuntime(logger);
60 if (!runtime) {
61 std::cout << "Failed to createInferRuntime\n";
62 }
63 ICudaEngine* engine = runtime->deserializeCudaEngine(engineData.data(), engineData.size());
64 if (!engine) {
65 std::cout << "Failed to deserialize CUDA engine from " << engineFile << "\n";
66 return 1;
67 } else {
68 std::cout << "Deserialized CUDA engine from " << engineFile << "\n";
69 }
70 IExecutionContext* context = engine->createExecutionContext();
71
72 // Assume input/output shape and create buffers
73 const int inputSize = 4 * 60 * 60; // for example, 224x224 RGB image
74 const int outputSize = 1564; // assuming 1000 classes for a classifier
75
76 float inputData[inputSize] = {0}; // Initialize input data as needed
77 float outputData[outputSize] = {0};
78
79 // Allocate device memory for input and output
80 float* d_input = nullptr;
81 float* d_output = nullptr;
82 cudaMalloc((void**)&d_input, inputSize * sizeof(float));
83 cudaMalloc((void**)&d_output, outputSize * sizeof(float));
84
85 // Copy input data to device
86 cudaMemcpy(d_input, inputData, inputSize * sizeof(float), cudaMemcpyHostToDevice);
87
88 // Run inference
89 void* buffers[] = {d_input, d_output};
90 context->executeV2(buffers);
91
92 // Copy output data back to host
93 cudaMemcpy(outputData, d_output, outputSize * sizeof(float), cudaMemcpyDeviceToHost);
94
95 // Clean up
96 cudaFree(d_input);
97 cudaFree(d_output);
98 delete context;
99 delete engine;
100 delete runtime;
101
102 // Output some results
103 std::cout << "Inference done! Output[0] = " << outputData[0] << std::endl;
104
105 return 0;
106}
void log(Severity severity, const char *msg) noexcept override
bool readEngineFile(const std::string &filename, std::vector< char > *buffer)
int main()