Spaces:
Running
Running
| /** | |
| * NOTE: Convert model with --fp16 may lead to incorrect results | |
| */ | |
| do \ | |
| { \ | |
| const cudaError_t error_code = call; \ | |
| if (error_code != cudaSuccess) \ | |
| { \ | |
| printf("CUDA_CHECK Error:\n"); \ | |
| printf(" File: %s\n", __FILE__); \ | |
| printf(" Line: %d\n", __LINE__); \ | |
| printf(" Error code: %d\n", error_code); \ | |
| printf(" Error text: %s\n", cudaGetErrorString(error_code)); \ | |
| exit(1); \ | |
| } \ | |
| } while (0) | |
| struct Object | |
| { | |
| cv::Rect_<float> rect; | |
| int label; | |
| float prob; | |
| }; | |
| const std::vector<std::string> labels = { | |
| "Person", "Bicycle", "Car", "Motorcycle", "Airplane", "Bus", "Train", | |
| "Truck", "Boat", "Traffic light", "Fire hydrant", "Stop sign", "Parking meter", | |
| "Bench", "Bird", "Cat", "Dog", "Horse", "Sheep", "Cow", "Elephant", "Bear", | |
| "Zebra", "Giraffe", "Backpack", "Umbrella", "Handbag", "Tie", "Suitcase", | |
| "Frisbee", "Skis", "Snowboard", "Sports ball", "Kite", "Baseball bat", | |
| "Baseball glove", "Skateboard", "Surfboard", "Tennis racket", "Bottle", | |
| "Wine glass", "Cup", "Fork", "Knife", "Spoon", "Bowl", "Banana", "Apple", | |
| "Sandwich", "Orange", "Broccoli", "Carrot", "Hot dog", "Pizza", "Donut", | |
| "Cake", "Chair", "Couch", "Potted plant", "Bed", "Dining table", "Toilet", | |
| "Tv", "Laptop", "Mouse", "Remote", "Keyboard", "Cell phone", "Microwave", | |
| "Oven", "Toaster", "Sink", "Refrigerator", "Book", "Clock", "Vase", "Scissors", | |
| "Teddy bear", "Hair drier", "Toothbrush" | |
| }; | |
| class Logger : public nvinfer1::ILogger | |
| { | |
| public: | |
| void log(Severity severity, const char *msg) noexcept override | |
| { | |
| if (severity <= nvinfer1::ILogger::Severity::kWARNING) | |
| { | |
| std::cerr << "[TensorRT] "; | |
| switch (severity) | |
| { | |
| case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break; | |
| case nvinfer1::ILogger::Severity::kERROR: std::cerr << "ERROR: "; break; | |
| case nvinfer1::ILogger::Severity::kWARNING: std::cerr << "WARNING: "; break; | |
| case nvinfer1::ILogger::Severity::kINFO: std::cerr << "INFO: "; break; | |
| case nvinfer1::ILogger::Severity::kVERBOSE: std::cerr << "VERBOSE: "; break; | |
| } | |
| std::cerr << msg << "\n"; | |
| } | |
| } | |
| }; | |
| static Logger logger; | |
| bool DrawObjects(cv::Mat &image, const std::vector<Object> &objects, | |
| const std::vector<std::string> &labels, bool isSilent) | |
| { | |
| for (auto obj : objects) | |
| { | |
| if (obj.label >= static_cast<int>(labels.size())) | |
| return false; | |
| if (isSilent != true) | |
| std::printf("%s = %.2f%% at (%.1f, %.1f) %.1f x %.1f\n", labels[obj.label].c_str(), obj.prob * 100.0f, | |
| obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); | |
| char text[256]; | |
| snprintf(text, sizeof(text), "%s %.1f%%", labels[obj.label].c_str(), obj.prob * 100.0f); | |
| auto scalar = cv::Scalar(255, 255, 255); | |
| cv::rectangle(image, obj.rect, scalar, 2); | |
| int baseLine = 5; | |
| cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.75, 1, &baseLine); | |
| int x = obj.rect.x - 1; | |
| int y = obj.rect.y - label_size.height - baseLine; | |
| y = std::max(0, y); | |
| if (x + label_size.width > image.cols) | |
| x = image.cols - label_size.width; | |
| cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), | |
| scalar, -1); | |
| cv::putText(image, text, cv::Point(x, y + label_size.height + baseLine / 2), | |
| cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0), 2); | |
| } | |
| return true; | |
| } | |
| size_t CountElement(const nvinfer1::Dims &dims) | |
| { | |
| int64_t total = 1; | |
| for (int32_t i = 0; i < dims.nbDims; ++i) | |
| total *= dims.d[i]; | |
| return static_cast<size_t>(total); | |
| } | |
| template <typename T> | |
| T Clamp(T val, T min, T max) | |
| { | |
| return val > min ? (val < max ? val : max) : min; | |
| } | |
| void GetLetterboxDimensions( | |
| const int img_rows, const int img_cols, | |
| const int target_size, | |
| int &resize_rows, int &resize_cols, int &pad_rows, int &pad_cols, float &scale | |
| ) | |
| { | |
| scale = static_cast<float>(target_size) / std::max(img_rows, img_cols); | |
| resize_rows = static_cast<int>(std::round(img_rows * scale)); | |
| resize_cols = static_cast<int>(std::round(img_cols * scale)); | |
| pad_rows = target_size - resize_rows; | |
| pad_cols = target_size - resize_cols; | |
| } | |
| int main(int argc, char *argv[]) | |
| { | |
| // --- Settings --- | |
| if (argc < 3) | |
| { | |
| std::printf("Usage: %s model image [conf] [target size]\n", argv[0]); | |
| return 0; | |
| } | |
| const std::string model_path = std::string(argv[1]); | |
| float conf_thres = 0.25f; | |
| int target_size = 640; | |
| if (argc >= 4 && std::stof(argv[3]) > 0.0f) | |
| conf_thres = std::stof(argv[3]); | |
| if (argc >= 5 && std::stoi(argv[4]) > 0 && | |
| std::stoi(argv[4]) % 32 == 0 && std::stoi(argv[4]) > 32) | |
| target_size = std::stoi(argv[4]); | |
| std::cout << "Model: " << model_path << "\n"; | |
| std::cout << "Input: " << argv[2] << "\n"; | |
| std::cout << "Conf: " << conf_thres << "\n"; | |
| std::cout << "Target size: " << target_size << "\n"; | |
| // --- Init TRT --- | |
| // load model data | |
| std::ifstream engine_file(model_path, std::ios::binary); | |
| if (!engine_file) | |
| { | |
| std::cerr << "Failed to open engine file\n"; | |
| return -1; | |
| } | |
| engine_file.seekg(0, engine_file.end); | |
| std::streamsize engine_size = engine_file.tellg(); | |
| engine_file.seekg(0, engine_file.beg); | |
| std::unique_ptr<char[]> engine_data{std::make_unique<char[]>(engine_size)}; | |
| if (!engine_file.read(engine_data.get(), engine_size)) | |
| { | |
| std::cerr << "Failed to read engine file\n"; | |
| return -1; | |
| } | |
| engine_file.close(); | |
| // create runtime, engine, context, and stream | |
| auto runtime{nvinfer1::createInferRuntime(logger)}; | |
| if (!runtime) | |
| { | |
| std::cerr << "Failed to create runtime\n"; | |
| return -1; | |
| } | |
| auto engine{runtime->deserializeCudaEngine(engine_data.get(), engine_size)}; | |
| if (!engine) | |
| { | |
| std::cerr << "Failed to deserialize engine\n"; | |
| return -1; | |
| } | |
| auto context{engine->createExecutionContext()}; | |
| if (!context) | |
| { | |
| std::cerr << "Failed to create contexts\n"; | |
| return -1; | |
| } | |
| std::unique_ptr<cudaStream_t> stream = std::make_unique<cudaStream_t>(); | |
| CUDA_CHECK(cudaStreamCreate(stream.get())); | |
| // get model info | |
| std::vector<std::pair<int, std::string>> in_tensor_info, out_tensor_info; | |
| for (int i = 0; i < engine->getNbIOTensors(); ++i) | |
| { | |
| const char *tensor_name = engine->getIOTensorName(i); | |
| nvinfer1::TensorIOMode io_mode = engine->getTensorIOMode(tensor_name); | |
| if (io_mode == nvinfer1::TensorIOMode::kINPUT) | |
| in_tensor_info.push_back({i, std::string(tensor_name)}); | |
| else if (io_mode == nvinfer1::TensorIOMode::kOUTPUT) | |
| out_tensor_info.push_back({i, std::string(tensor_name)}); | |
| } | |
| // create host memory | |
| size_t max_in0_size_byte = CountElement(context->getTensorShape(in_tensor_info[0].second.c_str())) * sizeof(float); | |
| size_t max_in1_size_byte = CountElement(context->getTensorShape(in_tensor_info[1].second.c_str())) * sizeof(int64_t); | |
| size_t max_out0_size_byte = CountElement(context->getTensorShape(out_tensor_info[0].second.c_str())) * sizeof(int64_t); | |
| size_t max_out1_size_byte = CountElement(context->getTensorShape(out_tensor_info[1].second.c_str())) * sizeof(float); | |
| size_t max_out2_size_byte = CountElement(context->getTensorShape(out_tensor_info[2].second.c_str())) * sizeof(float); | |
| std::vector<std::unique_ptr<unsigned char[]>> host_outs; | |
| host_outs.resize(out_tensor_info.size()); | |
| host_outs[0] = std::make_unique<unsigned char[]>(max_out0_size_byte); | |
| host_outs[1] = std::make_unique<unsigned char[]>(max_out1_size_byte); | |
| host_outs[2] = std::make_unique<unsigned char[]>(max_out2_size_byte); | |
| // create cuda memory | |
| std::vector<void *> buffers{}; | |
| buffers.resize(engine->getNbIOTensors()); | |
| CUDA_CHECK(cudaMalloc(&buffers[in_tensor_info[0].first], max_in0_size_byte)); | |
| CUDA_CHECK(cudaMalloc(&buffers[in_tensor_info[1].first], max_in1_size_byte)); | |
| CUDA_CHECK(cudaMalloc(&buffers[out_tensor_info[0].first], max_out0_size_byte)); | |
| CUDA_CHECK(cudaMalloc(&buffers[out_tensor_info[1].first], max_out1_size_byte)); | |
| CUDA_CHECK(cudaMalloc(&buffers[out_tensor_info[2].first], max_out2_size_byte)); | |
| // set in/out tensor address | |
| context->setInputTensorAddress(in_tensor_info[0].second.c_str(), buffers[in_tensor_info[0].first]); | |
| context->setInputTensorAddress(in_tensor_info[1].second.c_str(), buffers[in_tensor_info[1].first]); | |
| context->setOutputTensorAddress(out_tensor_info[0].second.c_str(), buffers[out_tensor_info[0].first]); | |
| context->setOutputTensorAddress(out_tensor_info[1].second.c_str(), buffers[out_tensor_info[1].first]); | |
| context->setOutputTensorAddress(out_tensor_info[2].second.c_str(), buffers[out_tensor_info[2].first]); | |
| // --- Detect --- | |
| cv::Mat image = cv::imread(argv[2]); | |
| if (image.empty()) | |
| { | |
| std::cout << "Failed to read image\n"; | |
| return -1; | |
| } | |
| // preprocessing | |
| int img_rows = image.rows; | |
| int img_cols = image.cols; | |
| float scale; | |
| int resize_rows, resize_cols, pad_rows, pad_cols; | |
| GetLetterboxDimensions( | |
| img_rows, img_cols, target_size, | |
| resize_rows, resize_cols, pad_rows, pad_cols, scale | |
| ); | |
| cv::Mat letterbox, blob; | |
| cv::resize(image, letterbox, cv::Size(resize_cols, resize_rows), 0, 0, cv::INTER_AREA); | |
| cv::copyMakeBorder( | |
| letterbox, letterbox, | |
| pad_rows / 2, pad_rows - pad_rows / 2, | |
| pad_cols / 2, pad_cols - pad_cols / 2, | |
| cv::BORDER_CONSTANT, cv::Scalar(114.0, 114.0, 114.0) | |
| ); | |
| // no normalization | |
| cv::dnn::blobFromImage(letterbox, blob, 1.0f / 255.0f, cv::Size(letterbox.cols, letterbox.rows), cv::Scalar(0, 0, 0), true, false, CV_32F); | |
| nvinfer1::Dims trt_in0_dims{}, trt_in1_dims{}; | |
| trt_in0_dims.nbDims = 4; | |
| trt_in0_dims.d[0] = 1; | |
| trt_in0_dims.d[1] = 3; | |
| trt_in0_dims.d[2] = letterbox.rows; | |
| trt_in0_dims.d[3] = letterbox.cols; | |
| context->setInputShape(in_tensor_info[0].second.c_str(), trt_in0_dims); | |
| std::vector<int64_t> orig_size{static_cast<int64_t>(letterbox.rows), static_cast<int64_t>(letterbox.cols)}; | |
| trt_in1_dims.nbDims = 2; | |
| trt_in1_dims.d[0] = 1; | |
| trt_in1_dims.d[1] = 2; | |
| context->setInputShape(in_tensor_info[1].second.c_str(), trt_in1_dims); | |
| // execute | |
| CUDA_CHECK(cudaMemcpyAsync(buffers[0], blob.data, max_in0_size_byte, cudaMemcpyHostToDevice, *stream)); | |
| CUDA_CHECK(cudaMemcpyAsync(buffers[1], orig_size.data(), max_in1_size_byte, cudaMemcpyHostToDevice, *stream)); | |
| context->enqueueV3(*stream); | |
| CUDA_CHECK(cudaMemcpyAsync(host_outs[0].get(), buffers[2], max_out0_size_byte, cudaMemcpyDeviceToHost, *stream)); | |
| CUDA_CHECK(cudaMemcpyAsync(host_outs[1].get(), buffers[3], max_out1_size_byte, cudaMemcpyDeviceToHost, *stream)); | |
| CUDA_CHECK(cudaMemcpyAsync(host_outs[2].get(), buffers[4], max_out2_size_byte, cudaMemcpyDeviceToHost, *stream)); | |
| CUDA_CHECK(cudaStreamSynchronize(*stream)); | |
| const int64_t *labels_ptr = reinterpret_cast<const int64_t *>(host_outs[0].get()); | |
| const float *boxes_ptr = reinterpret_cast<const float *>(host_outs[1].get()); | |
| const float *scores_ptr = reinterpret_cast<const float *>(host_outs[2].get()); | |
| size_t num_box = 300; | |
| size_t walk = 4; | |
| float dw = pad_cols / 2, dh = pad_rows / 2; | |
| std::vector<Object> objects; | |
| for (size_t i = 0; i < num_box; ++i) | |
| { | |
| if (scores_ptr[i] < conf_thres) | |
| continue; | |
| float x0 = boxes_ptr[i * walk]; | |
| float y0 = boxes_ptr[i * walk + 1]; | |
| float x1 = boxes_ptr[i * walk + 2]; | |
| float y1 = boxes_ptr[i * walk + 3]; | |
| x0 = (x0 - dw) / scale; | |
| y0 = (y0 - dh) / scale; | |
| x1 = (x1 - dw) / scale; | |
| y1 = (y1 - dh) / scale; | |
| x0 = Clamp(x0, 0.0f, static_cast<float>(img_cols)); | |
| y0 = Clamp(y0, 0.0f, static_cast<float>(img_rows)); | |
| x1 = Clamp(x1, x0, static_cast<float>(img_cols)); | |
| y1 = Clamp(y1, y0, static_cast<float>(img_rows)); | |
| Object object; | |
| object.rect.x = x0; | |
| object.rect.y = y0; | |
| object.rect.width = x1 - x0; | |
| object.rect.height = y1 - y0; | |
| object.prob = scores_ptr[i]; | |
| object.label = static_cast<int>(labels_ptr[i]); | |
| objects.emplace_back(object); | |
| } | |
| // save results | |
| if (DrawObjects(image, objects, labels, false)) | |
| cv::imwrite("./result.jpg", image); | |
| else | |
| std::cout << "Failed to draw objects\n"; | |
| // --- Release resources --- | |
| for (const auto &buffer : buffers) | |
| if (buffer) | |
| CUDA_CHECK(cudaFree(buffer)); | |
| if (stream && *stream) | |
| CUDA_CHECK(cudaStreamDestroy(*stream)); | |
| return 0; | |
| } | |