| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include <iostream> |
| | #include <opencv2/core/core.hpp> |
| | #include <opencv2/highgui/highgui.hpp> |
| | #include <opencv2/imgproc/imgproc.hpp> |
| | #include "keypoint_detector.h" |
| | #include "picodet_mnn.h" |
| |
|
| | #define __SAVE_RESULT__ |
| | |
| |
|
| | using namespace PaddleDetection; |
| |
|
| | struct object_rect { |
| | int x; |
| | int y; |
| | int width; |
| | int height; |
| | }; |
| |
|
| | int resize_uniform(cv::Mat& src, |
| | cv::Mat& dst, |
| | cv::Size dst_size, |
| | object_rect& effect_area) { |
| | int w = src.cols; |
| | int h = src.rows; |
| | int dst_w = dst_size.width; |
| | int dst_h = dst_size.height; |
| | dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); |
| |
|
| | float ratio_src = w * 1.0 / h; |
| | float ratio_dst = dst_w * 1.0 / dst_h; |
| |
|
| | int tmp_w = 0; |
| | int tmp_h = 0; |
| | if (ratio_src > ratio_dst) { |
| | tmp_w = dst_w; |
| | tmp_h = floor((dst_w * 1.0 / w) * h); |
| | } else if (ratio_src < ratio_dst) { |
| | tmp_h = dst_h; |
| | tmp_w = floor((dst_h * 1.0 / h) * w); |
| | } else { |
| | cv::resize(src, dst, dst_size); |
| | effect_area.x = 0; |
| | effect_area.y = 0; |
| | effect_area.width = dst_w; |
| | effect_area.height = dst_h; |
| | return 0; |
| | } |
| | cv::Mat tmp; |
| | cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); |
| |
|
| | if (tmp_w != dst_w) { |
| | int index_w = floor((dst_w - tmp_w) / 2.0); |
| | for (int i = 0; i < dst_h; i++) { |
| | memcpy(dst.data + i * dst_w * 3 + index_w * 3, |
| | tmp.data + i * tmp_w * 3, |
| | tmp_w * 3); |
| | } |
| | effect_area.x = index_w; |
| | effect_area.y = 0; |
| | effect_area.width = tmp_w; |
| | effect_area.height = tmp_h; |
| | } else if (tmp_h != dst_h) { |
| | int index_h = floor((dst_h - tmp_h) / 2.0); |
| | memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); |
| | effect_area.x = 0; |
| | effect_area.y = index_h; |
| | effect_area.width = tmp_w; |
| | effect_area.height = tmp_h; |
| | } else { |
| | printf("error\n"); |
| | } |
| | return 0; |
| | } |
| |
|
| | const int color_list[80][3] = { |
| | {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, |
| | {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153}, |
| | {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0}, |
| | {0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0}, |
| | {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0}, |
| | {255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127}, |
| | {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127}, |
| | {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127}, |
| | {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127}, |
| | {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255}, |
| | {0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255}, |
| | {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255}, |
| | {170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255}, |
| | {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0}, |
| | {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0}, |
| | {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0}, |
| | {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170}, |
| | {0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36}, |
| | {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182}, |
| | {218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, |
| | }; |
| |
|
| | void draw_bboxes(const cv::Mat& bgr, |
| | const std::vector<BoxInfo>& bboxes, |
| | object_rect effect_roi, |
| | std::string save_path = "None") { |
| | static const char* class_names[] = { |
| | "person", "bicycle", "car", |
| | "motorcycle", "airplane", "bus", |
| | "train", "truck", "boat", |
| | "traffic light", "fire hydrant", "stop sign", |
| | "parking meter", "bench", "bird", |
| | "cat", "dog", "horse", |
| | "sheep", "cow", "elephant", |
| | "bear", "zebra", "giraffe", |
| | "backpack", "umbrella", "handbag", |
| | "tie", "suitcase", "frisbee", |
| | "skis", "snowboard", "sports ball", |
| | "kite", "baseball bat", "baseball glove", |
| | "skateboard", "surfboard", "tennis racket", |
| | "bottle", "wine glass", "cup", |
| | "fork", "knife", "spoon", |
| | "bowl", "banana", "apple", |
| | "sandwich", "orange", "broccoli", |
| | "carrot", "hot dog", "pizza", |
| | "donut", "cake", "chair", |
| | "couch", "potted plant", "bed", |
| | "dining table", "toilet", "tv", |
| | "laptop", "mouse", "remote", |
| | "keyboard", "cell phone", "microwave", |
| | "oven", "toaster", "sink", |
| | "refrigerator", "book", "clock", |
| | "vase", "scissors", "teddy bear", |
| | "hair drier", "toothbrush"}; |
| |
|
| | cv::Mat image = bgr.clone(); |
| | int src_w = image.cols; |
| | int src_h = image.rows; |
| | int dst_w = effect_roi.width; |
| | int dst_h = effect_roi.height; |
| | float width_ratio = (float)src_w / (float)dst_w; |
| | float height_ratio = (float)src_h / (float)dst_h; |
| |
|
| | for (size_t i = 0; i < bboxes.size(); i++) { |
| | const BoxInfo& bbox = bboxes[i]; |
| | cv::Scalar color = cv::Scalar(color_list[bbox.label][0], |
| | color_list[bbox.label][1], |
| | color_list[bbox.label][2]); |
| | cv::rectangle(image, |
| | cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, |
| | (bbox.y1 - effect_roi.y) * height_ratio), |
| | cv::Point((bbox.x2 - effect_roi.x) * width_ratio, |
| | (bbox.y2 - effect_roi.y) * height_ratio)), |
| | color); |
| |
|
| | char text[256]; |
| | sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); |
| |
|
| | int baseLine = 0; |
| | cv::Size label_size = |
| | cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); |
| |
|
| | int x = (bbox.x1 - effect_roi.x) * width_ratio; |
| | int y = |
| | (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; |
| | if (y < 0) y = 0; |
| | if (x + label_size.width > image.cols) x = image.cols - label_size.width; |
| |
|
| | cv::rectangle( |
| | image, |
| | cv::Rect(cv::Point(x, y), |
| | cv::Size(label_size.width, label_size.height + baseLine)), |
| | color, |
| | -1); |
| |
|
| | cv::putText(image, |
| | text, |
| | cv::Point(x, y + label_size.height), |
| | cv::FONT_HERSHEY_SIMPLEX, |
| | 0.4, |
| | cv::Scalar(255, 255, 255)); |
| | } |
| |
|
| | if (save_path == "None") { |
| | cv::imshow("image", image); |
| | } else { |
| | cv::imwrite(save_path, image); |
| | std::cout << save_path << std::endl; |
| | } |
| | } |
| |
|
| | std::vector<BoxInfo> coordsback(const cv::Mat image, |
| | const object_rect effect_roi, |
| | const std::vector<BoxInfo>& bboxes) { |
| | int src_w = image.cols; |
| | int src_h = image.rows; |
| | int dst_w = effect_roi.width; |
| | int dst_h = effect_roi.height; |
| | float width_ratio = (float)src_w / (float)dst_w; |
| | float height_ratio = (float)src_h / (float)dst_h; |
| |
|
| | std::vector<BoxInfo> bboxes_oimg; |
| |
|
| | for (int i = 0; i < bboxes.size(); i++) { |
| | auto bbox = bboxes[i]; |
| | bbox.x1 = (bbox.x1 - effect_roi.x) * width_ratio; |
| | bbox.y1 = (bbox.y1 - effect_roi.y) * height_ratio; |
| | bbox.x2 = (bbox.x2 - effect_roi.x) * width_ratio; |
| | bbox.y2 = (bbox.y2 - effect_roi.y) * height_ratio; |
| | bboxes_oimg.emplace_back(bbox); |
| | } |
| | return bboxes_oimg; |
| | } |
| |
|
| | void image_infer_kpts(KeyPointDetector* kpts_detector, |
| | cv::Mat image, |
| | const object_rect effect_roi, |
| | const std::vector<BoxInfo>& results, |
| | std::string img_name = "kpts_vis", |
| | bool save_img = true) { |
| | std::vector<cv::Mat> cropimgs; |
| | std::vector<std::vector<float>> center_bs; |
| | std::vector<std::vector<float>> scale_bs; |
| | std::vector<KeyPointResult> kpts_results; |
| | auto results_oimg = coordsback(image, effect_roi, results); |
| |
|
| | for (int i = 0; i < results_oimg.size(); i++) { |
| | auto rect = results_oimg[i]; |
| | if (rect.label == 0) { |
| | cv::Mat cropimg; |
| | std::vector<float> center, scale; |
| | std::vector<int> area = {static_cast<int>(rect.x1), |
| | static_cast<int>(rect.y1), |
| | static_cast<int>(rect.x2), |
| | static_cast<int>(rect.y2)}; |
| | CropImg(image, cropimg, area, center, scale); |
| | |
| | cropimgs.emplace_back(cropimg); |
| | center_bs.emplace_back(center); |
| | scale_bs.emplace_back(scale); |
| | } |
| | if (cropimgs.size() == 1 || |
| | (cropimgs.size() > 0 && i == results_oimg.size() - 1)) { |
| | kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results); |
| | cropimgs.clear(); |
| | center_bs.clear(); |
| | scale_bs.clear(); |
| | } |
| | } |
| | std::vector<int> compression_params; |
| | compression_params.push_back(cv::IMWRITE_JPEG_QUALITY); |
| | compression_params.push_back(95); |
| | std::string kpts_savepath = |
| | "keypoint_" + img_name.substr(img_name.find_last_of('/') + 1); |
| | cv::Mat kpts_vis_img = |
| | VisualizeKptsResult(image, kpts_results, {0, 255, 0}, 0.3); |
| | if (save_img) { |
| | cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); |
| | printf("Visualized output saved as %s\n", kpts_savepath.c_str()); |
| | } else { |
| | cv::imshow("image", kpts_vis_img); |
| | } |
| | } |
| |
|
| | int image_demo(PicoDet& detector, |
| | KeyPointDetector* kpts_detector, |
| | const char* imagepath) { |
| | std::vector<cv::String> filenames; |
| | cv::glob(imagepath, filenames, false); |
| |
|
| | for (auto img_name : filenames) { |
| | cv::Mat image = cv::imread(img_name); |
| | if (image.empty()) { |
| | fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); |
| | return -1; |
| | } |
| | object_rect effect_roi; |
| | cv::Mat resized_img; |
| | resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); |
| | std::vector<BoxInfo> results; |
| | detector.detect(resized_img, results); |
| | if (kpts_detector) { |
| | image_infer_kpts(kpts_detector, image, effect_roi, results, img_name); |
| | } |
| | } |
| | return 0; |
| | } |
| |
|
| | int webcam_demo(PicoDet& detector, |
| | KeyPointDetector* kpts_detector, |
| | int cam_id) { |
| | cv::Mat image; |
| | cv::VideoCapture cap(cam_id); |
| |
|
| | while (true) { |
| | cap >> image; |
| | object_rect effect_roi; |
| | cv::Mat resized_img; |
| | resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); |
| | std::vector<BoxInfo> results; |
| | detector.detect(resized_img, results); |
| | if (kpts_detector) { |
| | image_infer_kpts(kpts_detector, image, effect_roi, results, "", false); |
| | } |
| | } |
| | return 0; |
| | } |
| |
|
| | int video_demo(PicoDet& detector, |
| | KeyPointDetector* kpts_detector, |
| | const char* path) { |
| | cv::Mat image; |
| | cv::VideoCapture cap(path); |
| |
|
| | while (true) { |
| | cap >> image; |
| | object_rect effect_roi; |
| | cv::Mat resized_img; |
| | resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); |
| | std::vector<BoxInfo> results; |
| | detector.detect(resized_img, results); |
| | if (kpts_detector) { |
| | image_infer_kpts(kpts_detector, image, effect_roi, results, "", false); |
| | } |
| | } |
| | return 0; |
| | } |
| |
|
| | int benchmark(KeyPointDetector* kpts_detector) { |
| | int loop_num = 100; |
| | int warm_up = 8; |
| |
|
| | double time_min = DBL_MAX; |
| | double time_max = -DBL_MAX; |
| | double time_avg = 0; |
| | cv::Mat image(256, 192, CV_8UC3, cv::Scalar(1, 1, 1)); |
| | std::vector<float> center = {128, 96}; |
| | std::vector<float> scale = {256, 192}; |
| | std::vector<cv::Mat> cropimgs = {image}; |
| | std::vector<std::vector<float>> center_bs = {center}; |
| | std::vector<std::vector<float>> scale_bs = {scale}; |
| | std::vector<KeyPointResult> kpts_results; |
| |
|
| | for (int i = 0; i < warm_up + loop_num; i++) { |
| | auto start = std::chrono::steady_clock::now(); |
| | std::vector<BoxInfo> results; |
| | kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results); |
| | auto end = std::chrono::steady_clock::now(); |
| |
|
| | std::chrono::duration<double> elapsed = end - start; |
| | double time = elapsed.count(); |
| | if (i >= warm_up) { |
| | time_min = (std::min)(time_min, time); |
| | time_max = (std::max)(time_max, time); |
| | time_avg += time; |
| | } |
| | } |
| | time_avg /= loop_num; |
| | fprintf(stderr, |
| | "%20s min = %7.2f max = %7.2f avg = %7.2f\n", |
| | "tinypose", |
| | time_min, |
| | time_max, |
| | time_avg); |
| | return 0; |
| | } |
| |
|
| | int main(int argc, char** argv) { |
| | if (argc != 3) { |
| | fprintf(stderr, |
| | "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n " |
| | "For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; " |
| | "\n For benchmark, mode=3 path=0.\n", |
| | argv[0]); |
| | return -1; |
| | } |
| | PicoDet detector = |
| | PicoDet("../weight/picodet_m_416.mnn", 416, 416, 4, 0.45, 0.3); |
| | KeyPointDetector* kpts_detector = |
| | new KeyPointDetector("../weight/tinypose256.mnn", 4, 256, 192); |
| | int mode = atoi(argv[1]); |
| | switch (mode) { |
| | case 0: { |
| | int cam_id = atoi(argv[2]); |
| | webcam_demo(detector, kpts_detector, cam_id); |
| | break; |
| | } |
| | case 1: { |
| | const char* images = argv[2]; |
| | image_demo(detector, kpts_detector, images); |
| | break; |
| | } |
| | case 2: { |
| | const char* path = argv[2]; |
| | video_demo(detector, kpts_detector, path); |
| | break; |
| | } |
| | case 3: { |
| | benchmark(kpts_detector); |
| | break; |
| | } |
| | default: { |
| | fprintf(stderr, |
| | "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; " |
| | "\n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, " |
| | "mode=2; \n For benchmark, mode=3 path=0.\n", |
| | argv[0]); |
| | break; |
| | } |
| | } |
| | delete kpts_detector; |
| | kpts_detector = nullptr; |
| | } |
| |
|