From b713bd41f75a29ff8d53c3d5c803457ec724fae3 Mon Sep 17 00:00:00 2001 From: lzzyzlbb <287246233@qq.com> Date: Wed, 8 Dec 2021 08:47:08 +0000 Subject: [PATCH 1/4] add c++ deploy for msvsr --- deploy/cpp/CMakeLists.txt | 246 +++++++++++++++++++++++ deploy/cpp/README.md | 54 +++++ deploy/cpp/cmake/yaml-cpp.cmake | 30 +++ deploy/cpp/docs/Jetson_build.md | 210 ++++++++++++++++++++ deploy/cpp/docs/linux_build.md | 121 ++++++++++++ deploy/cpp/docs/windows_vs2019_build.md | 152 ++++++++++++++ deploy/cpp/include/config_parser.h | 88 +++++++++ deploy/cpp/include/preprocess_op.h | 189 ++++++++++++++++++ deploy/cpp/include/vsr.h | 104 ++++++++++ deploy/cpp/scripts/build.sh | 81 ++++++++ deploy/cpp/src/main.cc | 230 +++++++++++++++++++++ deploy/cpp/src/preprocess_op.cc | 252 ++++++++++++++++++++++++ deploy/cpp/src/vsr.cc | 193 ++++++++++++++++++ 13 files changed, 1950 insertions(+) create mode 100644 deploy/cpp/CMakeLists.txt create mode 100644 deploy/cpp/README.md create mode 100644 deploy/cpp/cmake/yaml-cpp.cmake create mode 100644 deploy/cpp/docs/Jetson_build.md create mode 100755 deploy/cpp/docs/linux_build.md create mode 100644 deploy/cpp/docs/windows_vs2019_build.md create mode 100644 deploy/cpp/include/config_parser.h create mode 100644 deploy/cpp/include/preprocess_op.h create mode 100644 deploy/cpp/include/vsr.h create mode 100644 deploy/cpp/scripts/build.sh create mode 100644 deploy/cpp/src/main.cc create mode 100644 deploy/cpp/src/preprocess_op.cc create mode 100644 deploy/cpp/src/vsr.cc diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt new file mode 100644 index 000000000..5784ffe31 --- /dev/null +++ b/deploy/cpp/CMakeLists.txt @@ -0,0 +1,246 @@ +cmake_minimum_required(VERSION 3.0) +project(PaddleObjectDetector CXX C) + +option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) +option(WITH_TENSORRT "Compile demo with TensorRT." OFF) + +option(WITH_VSR "Whether to Compile vsr" ON) + +SET(PADDLE_DIR "" CACHE PATH "Location of libraries") +SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference") +SET(OPENCV_DIR "" CACHE PATH "Location of libraries") +SET(CUDA_LIB "" CACHE PATH "Location of libraries") +SET(CUDNN_LIB "" CACHE PATH "Location of libraries") +SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT") +SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT") + +include(cmake/yaml-cpp.cmake) + +include_directories("${CMAKE_SOURCE_DIR}/") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") +link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") + +if (WITH_VSR) + set(SRCS src/main.cc src/preprocess_op.cc src/vsr.cc) +endif() + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +if (WITH_MKL) + ADD_DEFINITIONS(-DUSE_MKL) +endif() + +if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "") + message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir") +endif() +message("PADDLE_DIR IS:" ${PADDLE_DIR}) + +if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") + message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") +endif() + +include_directories("${CMAKE_SOURCE_DIR}/") +include_directories("${PADDLE_DIR}/") +include_directories("${PADDLE_DIR}/third_party/install/protobuf/include") +include_directories("${PADDLE_DIR}/third_party/install/glog/include") +include_directories("${PADDLE_DIR}/third_party/install/gflags/include") +include_directories("${PADDLE_DIR}/third_party/install/xxhash/include") +if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include") + include_directories("${PADDLE_DIR}/third_party/install/snappy/include") +endif() +if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include") + include_directories("${PADDLE_DIR}/third_party/install/snappystream/include") +endif() +include_directories("${PADDLE_DIR}/third_party/boost") +include_directories("${PADDLE_DIR}/third_party/eigen3") + +if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + link_directories("${PADDLE_DIR}/third_party/install/snappy/lib") +endif() +if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib") +endif() + +link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib") +link_directories("${PADDLE_DIR}/third_party/install/glog/lib") +link_directories("${PADDLE_DIR}/third_party/install/gflags/lib") +link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib") +link_directories("${PADDLE_DIR}/paddle/lib/") +link_directories("${CMAKE_CURRENT_BINARY_DIR}") + + + +if (WIN32) + include_directories("${PADDLE_DIR}/paddle/fluid/inference") + include_directories("${PADDLE_DIR}/paddle/include") + link_directories("${PADDLE_DIR}/paddle/fluid/inference") + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH) + +else () + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH) + include_directories("${PADDLE_DIR}/paddle/include") + link_directories("${PADDLE_DIR}/paddle/lib") +endif () +include_directories(${OpenCV_INCLUDE_DIRS}) + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11") + set(CMAKE_STATIC_LIBRARY_PREFIX "") +endif() + +# TODO let users define cuda lib path +if (WITH_GPU) + if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "") + message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64") + endif() + if (NOT WIN32) + if (NOT DEFINED CUDNN_LIB) + message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64") + endif() + endif(NOT WIN32) +endif() + + +if (NOT WIN32) + if (WITH_TENSORRT AND WITH_GPU) + include_directories("${TENSORRT_INC_DIR}/") + link_directories("${TENSORRT_LIB_DIR}/") + endif() +endif(NOT WIN32) + +if (NOT WIN32) + set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph") + if(EXISTS ${NGRAPH_PATH}) + include(GNUInstallDirs) + include_directories("${NGRAPH_PATH}/include") + link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}") + set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() +endif() + +if(WITH_MKL) + include_directories("${PADDLE_DIR}/third_party/install/mklml/include") + if (WIN32) + set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib + ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib) + else () + set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib) + endif () + set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + if (WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + else () + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif () + endif() +else() + set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) +endif() + + +if (WIN32) + if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(DEPS + ${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS + ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + + +if (WIN32) + set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME}) +message("DEPS:" $DEPS) + +if (NOT WIN32) + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf z xxhash yaml-cpp + ) + if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + set(DEPS ${DEPS} snappystream) + endif() + if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + set(DEPS ${DEPS} snappy) + endif() +else() + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags_static libprotobuf xxhash libyaml-cppmt) + set(DEPS ${DEPS} libcmt shlwapi) + if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + set(DEPS ${DEPS} snappy) + endif() + if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + set(DEPS ${DEPS} snappystream) + endif() +endif(NOT WIN32) + +if(WITH_GPU) + if(NOT WIN32) + if (WITH_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + +if (NOT WIN32) + set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread") + set(DEPS ${DEPS} ${EXTERNAL_LIB}) +endif() + +set(DEPS ${DEPS} ${OpenCV_LIBS}) +add_executable(main ${SRCS}) +ADD_DEPENDENCIES(main ext-yaml-cpp) +message("DEPS:" $DEPS) +target_link_libraries(main ${DEPS}) + +if (WIN32 AND WITH_MKL) + add_custom_command(TARGET main POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll + ) +endif() + +if (WIN32) + add_custom_command(TARGET main POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll + ) +endif() diff --git a/deploy/cpp/README.md b/deploy/cpp/README.md new file mode 100644 index 000000000..ffa5e251e --- /dev/null +++ b/deploy/cpp/README.md @@ -0,0 +1,54 @@ +# C++端预测部署 + + + +## 各环境编译部署教程 +- [Linux 编译部署](docs/linux_build.md) +- [Windows编译部署(使用Visual Studio 2019)](docs/windows_vs2019_build.md) +- [NV Jetson编译部署](docs/Jetson_build.md) + + +## C++部署总览 +[1.说明](#1说明) + +[2.主要目录和文件](#2主要目录和文件) + + +### 1.说明 + +本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。 + +主要设计的目标包括以下四点: +- 跨平台,支持在 `Windows` 和 `Linux` 完成编译、二次开发集成和部署运行 +- 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑 +- 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化 +- 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`等 + +### 2.主要目录和文件 + +```bash +deploy/cpp +| +├── src +│ ├── main.cc # 集成代码示例, 程序入口 +│ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现 +│ └── preprocess_op.cc # 预处理相关主要逻辑封装实现 +| +├── include +│ ├── config_parser.h # 导出模型配置yaml文件解析 +│ ├── object_detector.h # 模型加载和预测主要逻辑封装类 +│ └── preprocess_op.h # 预处理相关主要逻辑类封装 +| +├── docs +│ ├── linux_build.md # Linux 编译指南 +│ └── windows_vs2019_build.md # Windows VS2019编译指南 +│ +├── build.sh # 编译命令脚本 +│ +├── CMakeList.txt # cmake编译入口文件 +| +├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置 +│ +└── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp) + +``` diff --git a/deploy/cpp/cmake/yaml-cpp.cmake b/deploy/cpp/cmake/yaml-cpp.cmake new file mode 100644 index 000000000..7bc7f34d4 --- /dev/null +++ b/deploy/cpp/cmake/yaml-cpp.cmake @@ -0,0 +1,30 @@ + +find_package(Git REQUIRED) + +include(ExternalProject) + +message("${CMAKE_BUILD_TYPE}") + +ExternalProject_Add( + ext-yaml-cpp + URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip + URL_MD5 9542d6de397d1fbd649ed468cb5850e6 + CMAKE_ARGS + -DYAML_CPP_BUILD_TESTS=OFF + -DYAML_CPP_BUILD_TOOLS=OFF + -DYAML_CPP_INSTALL=OFF + -DYAML_CPP_BUILD_CONTRIB=OFF + -DMSVC_SHARED_RT=OFF + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" + # Disable install step + INSTALL_COMMAND "" + LOG_DOWNLOAD ON + LOG_BUILD 1 +) diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md new file mode 100644 index 000000000..4f54738de --- /dev/null +++ b/deploy/cpp/docs/Jetson_build.md @@ -0,0 +1,210 @@ +# Jetson平台编译指南 + +## 说明 +`NVIDIA Jetson`设备是具有`NVIDIA GPU`的嵌入式设备,可以将目标检测算法部署到该设备上。本文档是在`Jetson`硬件上部署`PaddleDetection`模型的教程。 + +本文档以`Jetson TX2`硬件、`JetPack 4.3`版本为例进行说明。 + +`Jetson`平台的开发指南请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html). + +## Jetson环境搭建 +`Jetson`系统软件安装,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html). + +* (1) 查看硬件系统的l4t的版本号 +``` +cat /etc/nv_tegra_release +``` +* (2) 根据硬件,选择硬件可安装的`JetPack`版本,硬件和`JetPack`版本对应关系请参考[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive). + +* (3) 下载`JetPack`,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html) 中的`Preparing a Jetson Developer Kit for Use`章节内容进行刷写系统镜像。 + +**注意**: 请在[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive) 根据硬件选择适配的`JetPack`版本进行刷机。 + +## 下载或编译`Paddle`预测库 +本文档使用`Paddle`在`JetPack4.3`上预先编译好的预测库,请根据硬件在[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 中选择对应版本的`Paddle`预测库。 + +这里选择[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.0-nv-jetson-jetpack4.3-all/paddle_inference.tgz), `Paddle`版本`2.0.0-rc0`,`CUDA`版本`10.0`,`CUDNN`版本`7.6`,`TensorRT`版本`6`。 + +若需要自己在`Jetson`平台上自定义编译`Paddle`库,请参考文档[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html) 的`NVIDIA Jetson嵌入式硬件预测库源码编译`部分内容。 + +### Step1: 下载代码 + + `git clone https://github.com/PaddlePaddle/PaddleDetection.git` + +**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 + + +### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference + +解压下载的[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.1-nv-jetson-jetpack4.3-all/paddle_inference.tgz) 。 + +下载并解压后`/root/projects/paddle_inference`目录包含内容为: +``` +paddle_inference +├── paddle # paddle核心库和头文件 +| +├── third_party # 第三方依赖库和头文件 +| +└── version.txt # 版本和编译信息 +``` + +**注意:** 预编译库`nv-jetson-cuda10-cudnn7.6-trt6`使用的`GCC`版本是`7.5.0`,其他都是使用`GCC 4.8.5`编译的。使用高版本的GCC可能存在`ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。 + + +### Step4: 编译 + +编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: + +注意,`TX2`平台的`CUDA`、`CUDNN`需要通过`JetPack`安装。 + +``` +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=ON + +# 是否使用MKL or openblas,TX2需要设置为OFF +WITH_MKL=OFF + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=ON + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu + +# Paddle 预测库路径 +PADDLE_DIR=/path/to/paddle_inference/ + +# Paddle 预测库名称 +PADDLE_LIB_NAME=paddle_inference + +# Paddle 的预测库是否使用静态库来编译 +# 使用TensorRT时,Paddle的预测库通常为动态库 +WITH_STATIC_LIB=OFF + +# CUDA 的 lib 路径 +CUDA_LIB=/usr/local/cuda-10.0/lib64 + +# CUDNN 的 lib 路径 +CUDNN_LIB=/usr/lib/aarch64-linux-gnu + +# 是否开启关键点模型预测功能 +WITH_KEYPOINT=ON + +# OPENCV_DIR 的路径 +# linux平台请下载:https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2,并解压到deps文件夹下 +# TX2平台请下载:https://paddlemodels.bj.bcebos.com/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip,并解压到deps文件夹下 +OPENCV_DIR=/path/to/opencv + +# 请检查以上各个路径是否正确 + +# 以下无需改动 +cmake .. \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_MKL=OFF \ + -DWITH_TENSORRT=${WITH_TENSORRT} \ + -DTENSORRT_DIR=${TENSORRT_DIR} \ + -DPADDLE_DIR=${PADDLE_DIR} \ + -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DOPENCV_DIR=${OPENCV_DIR} \ + -DPADDLE_LIB_NAME={PADDLE_LIB_NAME} \ + -DWITH_KEYPOINT=${WITH_KEYPOINT} +make +``` + +例如设置如下: +``` +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=ON + +# 是否使用MKL or openblas +WITH_MKL=OFF + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=OFF + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu + +# Paddle 预测库路径 +PADDLE_DIR=/home/nvidia/PaddleDetection_infer/paddle_inference/ + +# Paddle 预测库名称 +PADDLE_LIB_NAME=paddle_inference + +# Paddle 的预测库是否使用静态库来编译 +# 使用TensorRT时,Paddle的预测库通常为动态库 +WITH_STATIC_LIB=OFF + +# CUDA 的 lib 路径 +CUDA_LIB=/usr/local/cuda-10.0/lib64 + +# CUDNN 的 lib 路径 +CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ + +# 是否开启关键点模型预测功能 +WITH_KEYPOINT=ON +``` + +修改脚本设置好主要参数后,执行`build`脚本: + ```shell + sh ./scripts/build.sh + ``` + +### Step5: 预测及可视化 +编译成功后,预测入口程序为`build/main`其主要命令参数说明如下: +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 导出的检测预测模型所在路径 | +| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 | +| --image_file | 要预测的图片文件路径 | +| --image_dir | 要预测的图片文件夹路径 | +| --video_file | 要预测的视频文件路径 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | +| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | +| --run_benchmark | 是否重复预测来进行benchmark测速 | +| --output_dir | 输出图片所在的文件夹, 默认为output | +| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | +| --cpu_threads | 设置cpu线程数,默认为1 | +| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | + +**注意**: +- 优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 +- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 + + +`样例一`: +```shell +#不使用`GPU`测试图片 `/root/projects/images/test.jpeg` +./main --model_dir=/root/projects/models/yolov3_darknet --image_file=/root/projects/images/test.jpeg +``` + +图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 + + +`样例二`: +```shell +#使用 `GPU`预测视频`/root/projects/videos/test.mp4` +./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU +``` +视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 + +`样例三`: +```shell +#使用关键点模型与检测模型联合预测,使用 `GPU`预测 +#检测模型检测到的人送入关键点模型进行关键点预测 +./main --model_dir=/root/projects/models/yolov3_darknet --model_dir_keypoint=/root/projects/models/hrnet_w32_256x192 --image_file=/root/projects/images/test.jpeg --device=GPU +``` + +## 性能测试 +benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md) diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md new file mode 100755 index 000000000..6b8ed9a52 --- /dev/null +++ b/deploy/cpp/docs/linux_build.md @@ -0,0 +1,121 @@ +# Linux平台编译指南 + +## 说明 +本文档在 `Linux`平台使用`GCC 8.2`测试过,如果需要使用其他G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。本文档使用的预置的opencv库是在ubuntu 16.04上用gcc4.8编译的,如果需要在ubuntu 16.04以外的系统环境编译,那么需自行编译opencv库。 + +## 前置条件 +* G++ 8.2 +* CUDA 9.0 / CUDA 10.1, cudnn 7+ (仅在使用GPU版本的预测库时需要) +* CMake 3.0+ + +请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**。 + +### Step1: 下载代码 + + `git clone https://github.com/PaddlePaddle/PaddleGAN.git` + +**说明**:其中`C++`预测代码在`/root/projects/PaddleGAN/deploy/cpp` 目录,该目录不依赖任何`PaddleGAN`下其他目录。 + + +### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference + +PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html) + + +下载并解压后`/root/projects/paddle_inference`目录包含内容为: +``` +paddle_inference +├── paddle # paddle核心库和头文件 +| +├── third_party # 第三方依赖库和头文件 +| +└── version.txt # 版本和编译信息 +``` + +**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。 + + +### Step3: 编译 + +编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: + +``` +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=OFF + +# 使用MKL or openblas +WITH_MKL=ON + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=OFF + +# TensorRT 的include路径 +TENSORRT_LIB_DIR=/path/to/TensorRT/include + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/path/to/TensorRT/lib + +# Paddle 预测库路径 +PADDLE_DIR=/path/to/paddle_inference + +# Paddle 预测库名称 +PADDLE_LIB_NAME=paddle_inference + +# CUDA 的 lib 路径 +CUDA_LIB=/path/to/cuda/lib + +# CUDNN 的 lib 路径 +CUDNN_LIB=/path/to/cudnn/lib + +# 是否开启关键点模型预测功能 +WITH_VSR=ON + +# 请检查以上各个路径是否正确 + +# 以下无需改动 +cmake .. \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_MKL=${WITH_MKL} \ + -DWITH_TENSORRT=${WITH_TENSORRT} \ + -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \ + -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \ + -DPADDLE_DIR=${PADDLE_DIR} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DOPENCV_DIR=${OPENCV_DIR} \ + -DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \ + -DWITH_KEYPOINT=${WITH_VSR} +make + +``` + +修改脚本设置好主要参数后,执行`build`脚本: + ```shell + sh ./scripts/build.sh + ``` + +**注意**: OPENCV依赖OPENBLAS,Ubuntu用户需确认系统是否已存在`libopenblas.so`。如未安装,可执行apt-get install libopenblas-dev进行安装。 + +### Step4: 预测及可视化 +编译成功后,预测入口程序为`build/main`其主要命令参数说明如下: +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 导出的检测预测模型所在路径 | +| --video_file | 要预测的视频文件路径 | +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | +| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | +| --run_benchmark | 是否重复预测来进行benchmark测速 | +| --output_dir | 输出图片所在的文件夹, 默认为output | +| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | +| --cpu_threads | 设置cpu线程数,默认为1 | +| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | + +`样例一`: +```shell +#使用 `GPU`预测视频`/root/projects/videos/test.mp4` +./build/main --model_dir=/root/projects/models/multistagevsrmodel_generator --config_dir=/root/projects/models/infer_cfg.yml --video_file=/root/projects/images/test.mp4 --device=GPU +``` +视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md new file mode 100644 index 000000000..9c5a2d33d --- /dev/null +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -0,0 +1,152 @@ +# Visual Studio 2019 Community CMake 编译指南 + +Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 + + +## 前置条件 +* Visual Studio 2019 (根据Paddle预测库所使用的VS版本选择,请参考 [Visual Studio 不同版本二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=vs-2019) ) +* CUDA 9.0 / CUDA 10.0,cudnn 7+ / TensoRT(仅在使用GPU版本的预测库时需要) +* CMake 3.0+ [CMake下载](https://cmake.org/download/) + +**特别注意:windows下预测库需要的TensorRT版本为:**。 + +| 预测库版本 | TensorRT版本 | +| ---- | ---- | +| cuda10.1_cudnn7.6_avx_mkl_trt6 | TensorRT-6.0.1.5 | +| cuda10.2_cudnn7.6_avx_mkl_trt7 | TensorRT-7.0.0.11 | +| cuda11.0_cudnn8.0_avx_mkl_trt7 | TensorRT-7.2.1.6 | + +请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 + +**下面所有示例以工作目录为 `D:\projects`演示**。 + +### Step1: 下载代码 + +下载源代码 +```shell +git clone https://github.com/PaddlePaddle/PaddleDetection.git +``` + +**说明**:其中`C++`预测代码在`PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 + + +### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference + +PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows) + +解压后`D:\projects\paddle_inference`目录包含内容为: +``` +paddle_inference +├── paddle # paddle核心库和头文件 +| +├── third_party # 第三方依赖库和头文件 +| +└── version.txt # 版本和编译信息 +``` + +### Step3: 安装配置OpenCV + +1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) +2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` +3. 配置环境变量,如下流程所示(如果使用全局绝对路径,可以不用设置环境变量) + - 我的电脑->属性->高级系统设置->环境变量 + - 在系统变量中找到Path(如没有,自行创建),并双击编辑 + - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` + +### Step4: 编译 + +1. 进入到`cpp`文件夹 +``` +cd D:\projects\PaddleDetection\deploy\cpp +``` + +2. 使用CMake生成项目文件 + +编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): + +| 参数名 | 含义 | +| ---- | ---- | +| *CUDA_LIB | CUDA的库路径 | +| *CUDNN_LIB | CUDNN的库路径 | +| OPENCV_DIR | OpenCV的安装路径, | +| PADDLE_DIR | Paddle预测库的路径 | +| PADDLE_LIB_NAME | Paddle 预测库名称 | + +**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的勾去掉 2. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉 3.如无需使用关键点模型可以把`WITH_KEYPOINT`勾去掉 + +执行如下命令项目文件: +``` +cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=path_to_cuda_lib -DCUDNN_LIB=path_to_cudnn_lib -DPADDLE_DIR=path_to_paddle_lib -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=path_to_opencv -DWITH_KEYPOINT=ON +``` + +例如: +``` +cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=D:\projects\packages\cuda10_0\lib\x64 -DCUDNN_LIB=D:\projects\packages\cuda10_0\lib\x64 -DPADDLE_DIR=D:\projects\packages\paddle_inference -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=D:\projects\packages\opencv3_4_6 -DWITH_KEYPOINT=ON +``` + +3. 编译 +用`Visual Studio 16 2019`打开`cpp`文件夹下的`PaddleObjectDetector.sln`,将编译模式设置为`Release`,点击`生成`->`全部生成 + + +### Step5: 预测及可视化 + +上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: + +``` +cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release +``` +可执行文件`main`即为样例的预测程序,其主要的命令行参数如下: + +| 参数 | 说明 | +| ---- | ---- | +| --model_dir | 导出的检测预测模型所在路径 | +| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 | +| --image_file | 要预测的图片文件路径 | +| --image_dir | 要预测的图片文件夹路径 | +| --video_file | 要预测的视频文件路径 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | +| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | +| --run_benchmark | 是否重复预测来进行benchmark测速 | +| --output_dir | 输出图片所在的文件夹, 默认为output | +| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | +| --cpu_threads | 设置cpu线程数,默认为1 | +| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | + +**注意**: +(1)优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 +(2)如果提示找不到`opencv_world346.dll`,把`D:\projects\packages\opencv3_4_6\build\x64\vc14\bin`文件夹下的`opencv_world346.dll`拷贝到`main.exe`文件夹下即可。 +(3)--run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 + + +`样例一`: +```shell +#不使用`GPU`测试图片 `D:\\images\\test.jpeg` +.\main --model_dir=D:\\models\\yolov3_darknet --image_file=D:\\images\\test.jpeg +``` + +图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 + + +`样例二`: +```shell +#使用`GPU`测试视频 `D:\\videos\\test.mp4` +.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU +``` + +视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 + + +`样例三`: +```shell +#使用关键点模型与检测模型联合预测,使用 `GPU`预测 +#检测模型检测到的人送入关键点模型进行关键点预测 +.\main --model_dir=D:\\models\\yolov3_darknet --model_dir_keypoint=D:\\models\\hrnet_w32_256x192 --image_file=D:\\images\\test.jpeg --device=GPU +``` + + +## 性能测试 +Benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md) diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h new file mode 100644 index 000000000..0d28da34c --- /dev/null +++ b/deploy/cpp/include/config_parser.h @@ -0,0 +1,88 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "yaml-cpp/yaml.h" + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + +namespace PaddleGAN { + +// Inference model configuration parser +class ConfigPaser { + public: + ConfigPaser() {} + + ~ConfigPaser() {} + + bool load_config(const std::string& config_dir) { + // Load as a YAML::Node + YAML::Node config; + config = YAML::LoadFile(config_dir); + + // Get runtime mode : fluid, trt_fp16, trt_fp32 + if (config["mode"].IsDefined()) { + mode_ = config["mode"].as(); + } else { + std::cerr << "Please set mode, " + << "support value : fluid/trt_fp16/trt_fp32." + << std::endl; + return false; + } + + + // Get min_subgraph_size for tensorrt + if (config["min_subgraph_size"].IsDefined()) { + min_subgraph_size_ = config["min_subgraph_size"].as(); + } else { + std::cerr << "Please set min_subgraph_size." << std::endl; + return false; + } + + // Get Preprocess for preprocessing + if (config["Preprocess"].IsDefined()) { + preprocess_info_ = config["Preprocess"]; + } else { + std::cerr << "Please set Preprocess." << std::endl; + return false; + } + + + // Get use_dynamic_shape for TensorRT + if (config["use_dynamic_shape"].IsDefined()) { + use_dynamic_shape_ = config["use_dynamic_shape"].as(); + } else { + std::cerr << "Please set use_dynamic_shape." << std::endl; + return false; + } + } + + std::string mode_; + int min_subgraph_size_; + YAML::Node preprocess_info_; + bool use_dynamic_shape_; +}; + +} // namespace PaddleGAN + diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h new file mode 100644 index 000000000..4635dc17a --- /dev/null +++ b/deploy/cpp/include/preprocess_op.h @@ -0,0 +1,189 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace PaddleGAN { + +// Object for storing all preprocessed data +class ImageBlob { + public: + // image width and height + std::vector im_shape_; + // Buffer for image data after preprocessing + std::vector im_data_; + // in net data shape(after pad) + std::vector in_net_shape_; + // Evaluation image width and height + //std::vector eval_im_size_f_; + // Scale factor for image size to origin image size + std::vector scale_factor_; +}; + +// Abstraction of preprocessing opration class +class PreprocessOp { + public: + virtual void Init(const YAML::Node& item) = 0; + virtual void Run(cv::Mat* im, ImageBlob* data) = 0; +}; + +class InitInfo : public PreprocessOp{ + public: + virtual void Init(const YAML::Node& item) {} + virtual void Run(cv::Mat* im, ImageBlob* data); +}; + +class NormalizeImage : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + mean_ = item["mean"].as>(); + scale_ = item["std"].as>(); + is_scale_ = item["is_scale"].as(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + // CHW or HWC + std::vector mean_; + std::vector scale_; + bool is_scale_; +}; + +class Permute : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) {} + virtual void Run(cv::Mat* im, ImageBlob* data); + +}; + +class Resize : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + interp_ = item["interp"].as(); + keep_ratio_ = item["keep_ratio"].as(); + target_size_ = item["target_size"].as>(); + } + + // Compute best resize scale for x-dimension, y-dimension + std::pair GenerateScale(const cv::Mat& im); + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int interp_; + bool keep_ratio_; + std::vector target_size_; + std::vector in_net_shape_; +}; + +class LetterBoxResize : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + target_size_ = item["target_size"].as>(); + } + + float GenerateScale(const cv::Mat& im); + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector target_size_; + std::vector in_net_shape_; +}; +// Models with FPN need input shape % stride == 0 +class PadStride : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + stride_ = item["stride"].as(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int stride_; +}; + +class TopDownEvalAffine : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + trainsize_ = item["trainsize"].as>(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int interp_ = 1; + std::vector trainsize_; +}; + +void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio=0.15); + +class Preprocessor { + public: + void Init(const YAML::Node& config_node) { + // initialize image info at first + ops_["InitInfo"] = std::make_shared(); + for (const auto& item : config_node) { + auto op_name = item["type"].as(); + + ops_[op_name] = CreateOp(op_name); + ops_[op_name]->Init(item); + } + } + + std::shared_ptr CreateOp(const std::string& name) { + if (name == "Resize") { + return std::make_shared(); + } else if (name == "LetterBoxResize") { + return std::make_shared(); + } else if (name == "Permute") { + return std::make_shared(); + } else if (name == "NormalizeImage") { + return std::make_shared(); + } else if (name == "PadStride") { + // use PadStride instead of PadBatch + return std::make_shared(); + } else if (name == "TopDownEvalAffine") { + return std::make_shared(); + } + std::cerr << "can not find function of OP: " << name << " and return: nullptr" << std::endl; + return nullptr; + } + + void Run(cv::Mat* im, ImageBlob* data); + + public: + static const std::vector RUN_ORDER; + + private: + std::unordered_map> ops_; +}; + +} // namespace PaddleDetection + diff --git a/deploy/cpp/include/vsr.h b/deploy/cpp/include/vsr.h new file mode 100644 index 000000000..b949ed05f --- /dev/null +++ b/deploy/cpp/include/vsr.h @@ -0,0 +1,104 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle_inference_api.h" // NOLINT + +#include "include/preprocess_op.h" +#include "include/config_parser.h" + +using namespace paddle_infer; + +namespace PaddleGAN { + + +class VSR { + public: + explicit VSR(const std::string& model_dir, + const std::string& config_dir, + const std::string& device="CPU", + bool use_mkldnn=false, + int cpu_threads=1, + const std::string& run_mode="fluid", + const int batch_size=1, + const int gpu_id=0, + const int trt_min_shape=1, + const int trt_max_shape=1280, + const int trt_opt_shape=640, + bool trt_calib_mode=false) { + this->device_ = device; + this->gpu_id_ = gpu_id; + this->cpu_math_library_num_threads_ = cpu_threads; + this->use_mkldnn_ = use_mkldnn; + + this->trt_min_shape_ = trt_min_shape; + this->trt_max_shape_ = trt_max_shape; + this->trt_opt_shape_ = trt_opt_shape; + this->trt_calib_mode_ = trt_calib_mode; + config_.load_config(config_dir); + this->use_dynamic_shape_ = config_.use_dynamic_shape_; + this->min_subgraph_size_ = config_.min_subgraph_size_; + preprocessor_.Init(config_.preprocess_info_); + LoadModel(model_dir, batch_size, run_mode); + + } + + // Load Paddle inference model + void LoadModel( + const std::string& model_dir, + const int batch_size = 1, + const std::string& run_mode = "fluid"); + + // Run predictor + void Predict(const std::vector imgs, + const int warmup = 0, + const int repeats = 1, + std::vector* result = nullptr, + std::vector* times = nullptr); + + private: + std::string device_ = "CPU"; + int gpu_id_ = 0; + int cpu_math_library_num_threads_ = 1; + bool use_mkldnn_ = false; + int min_subgraph_size_ = 3; + bool use_dynamic_shape_ = false; + int trt_min_shape_ = 1; + int trt_max_shape_ = 1280; + int trt_opt_shape_ = 640; + bool trt_calib_mode_ = false; + // Preprocess image and copy data to input buffer + void Preprocess(const cv::Mat& image_mat); + // Postprocess result + std::shared_ptr predictor_; + Preprocessor preprocessor_; + ImageBlob inputs_; + float threshold_; + ConfigPaser config_; + +}; + +} // namespace PaddleGAN diff --git a/deploy/cpp/scripts/build.sh b/deploy/cpp/scripts/build.sh new file mode 100644 index 000000000..c9bc82bb4 --- /dev/null +++ b/deploy/cpp/scripts/build.sh @@ -0,0 +1,81 @@ +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=OFF + +# 是否使用MKL or openblas,TX2需要设置为OFF +WITH_MKL=ON + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=OFF + +# paddle 预测库lib名称,由于不同平台不同版本预测库lib名称不同,请查看所下载的预测库中`paddle_inference/lib/`文件夹下`lib`的名称 +PADDLE_LIB_NAME=libpaddle_inference + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/path/to/tensorrt/include + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/path/to/tensorrt/lib + +# Paddle 预测库路径 +PADDLE_DIR=../../../../paddle_inference + +# CUDA 的 lib 路径 +CUDA_LIB=/usr/local/cuda-9.0/targets/x86_64-linux/lib/ + +# CUDNN 的 lib 路径 +CUDNN_LIB=/usr/lib/x86_64-linux-gnu/ + +WITH_VSR=ON + +MACHINE_TYPE=`uname -m` +echo "MACHINE_TYPE: "${MACHINE_TYPE} + + +if [ "$MACHINE_TYPE" = "x86_64" ] +then + echo "set OPENCV_DIR for x86_64" + # linux系统通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz + tar -xvf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/opencv-3.4.16_gcc8.2_ffmpeg + +elif [ "$MACHINE_TYPE" = "aarch64" ] +then + echo "set OPENCV_DIR for aarch64" + # TX2平台通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://bj.bcebos.com/v1/paddledet/data/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz + tar -xvf TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0/ + +else + echo "Please set OPENCV_DIR manually" +fi + +echo "OPENCV_DIR: "$OPENCV_DIR + +# 以下无需改动 +rm -rf build +mkdir -p build +cd build +cmake .. \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_MKL=${WITH_MKL} \ + -DWITH_TENSORRT=${WITH_TENSORRT} \ + -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \ + -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \ + -DPADDLE_DIR=${PADDLE_DIR} \ + -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DOPENCV_DIR=${OPENCV_DIR} \ + -DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \ + -DWITH_MOT=${WITH_VSR} + +make +echo "make finished!" diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc new file mode 100644 index 000000000..231cf7374 --- /dev/null +++ b/deploy/cpp/src/main.cc @@ -0,0 +1,230 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#elif LINUX +#include +#include +#endif + +#include "include/vsr.h" +#include + + +DEFINE_string(model_dir, "", "Path of inference model"); +DEFINE_string(config_dir, "", "Path of inference model config"); +DEFINE_int32(batch_size, 1, "batch_size"); +DEFINE_int32(frame_num, 2, "frame_num"); +DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); +DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); +DEFINE_string(output_dir, "output", "Directory of output visualization files."); +DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); +DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); +DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); +DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); +DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); +DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); +DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); + +void PrintBenchmarkLog(std::vector det_time, int img_num){ + LOG(INFO) << "----------------------- Config info -----------------------"; + LOG(INFO) << "runtime_device: " << FLAGS_device; + LOG(INFO) << "ir_optim: " << "True"; + LOG(INFO) << "enable_memory_optim: " << "True"; + int has_trt = FLAGS_run_mode.find("trt"); + if (has_trt >= 0) { + LOG(INFO) << "enable_tensorrt: " << "True"; + std::string precision = FLAGS_run_mode.substr(4, 8); + LOG(INFO) << "precision: " << precision; + } else { + LOG(INFO) << "enable_tensorrt: " << "False"; + LOG(INFO) << "precision: " << "fp32"; + } + LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); + LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; + LOG(INFO) << "----------------------- Data info -----------------------"; + LOG(INFO) << "batch_size: " << FLAGS_batch_size; + LOG(INFO) << "input_shape: " << "dynamic shape"; + LOG(INFO) << "----------------------- Model info -----------------------"; + //FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); + LOG(INFO) << "model_name: " << FLAGS_model_dir; + LOG(INFO) << "----------------------- Perf info ------------------------"; + LOG(INFO) << "Total number of predicted data: " << img_num + << " and total time spent(ms): " + << std::accumulate(det_time.begin(), det_time.end(), 0); + LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num + << ", inference_time(ms): " << det_time[1] / img_num + << ", postprocess_time(ms): " << det_time[2]; +} + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(OS_PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static bool PathExists(const std::string& path){ +#ifdef _WIN32 + struct _stat buffer; + return (_stat(path.c_str(), &buffer) == 0); +#else + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +#endif // !_WIN32 +} + +static void MkDir(const std::string& path) { + if (PathExists(path)) return; + int ret = 0; +#ifdef _WIN32 + ret = _mkdir(path.c_str()); +#else + ret = mkdir(path.c_str(), 0755); +#endif // !_WIN32 + if (ret != 0) { + std::string path_error(path); + path_error += " mkdir failed!"; + throw std::runtime_error(path_error); + } +} + +static void MkDirs(const std::string& path) { + if (path.empty()) return; + if (PathExists(path)) return; + + MkDirs(DirName(path)); + MkDir(path); +} + +void PredictVideo(const std::string& video_path, + PaddleGAN::VSR* vsr, + const std::string& output_dir = "output") { + // Open video + cv::VideoCapture capture; + std::string video_out_name = "output.mp4"; + capture.open(video_path.c_str()); + if (!capture.isOpened()) { + printf("can not open video : %s\n", video_path.c_str()); + return; + } + + // Get Video info : resolution, fps, frame count + int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); + int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); + int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); + int video_frame_count = static_cast(capture.get(CV_CAP_PROP_FRAME_COUNT)); + printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count); + + // Create VideoWriter for output + cv::VideoWriter video_out; + std::string video_out_path(output_dir); + if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { + video_out_path += OS_PATH_SEP; + } + video_out_path += video_out_name; + + video_out.open(video_out_path.c_str(), + 0x00000021, + video_fps, + cv::Size(video_width, video_height), + true); + if (!video_out.isOpened()) { + printf("create video writer failed!\n"); + return; + } + + std::vector det_times; + // Capture all frames and do inference + cv::Mat frame; + int frame_id = 1; + bool read_end = false; + while (capture.read(frame)) { + std::vector imgs; + for (int i = 0; i < FLAGS_frame_num; i++) { + capture.read(frame); + if (!frame.empty()) { + imgs.push_back(frame); + } else { + read_end = true; + } + } + if (read_end) { + break; + } + std::vector result; + vsr->Predict(imgs, 0, 1, &result, &det_times); + for (const auto& item : result) { + cv::Mat temp = cv::Mat::zeros(item.size(), CV_8UC3); + (item).convertTo(temp, CV_8UC3, 255); + video_out.write(temp); + + } + frame_id += FLAGS_frame_num; + } + capture.release(); + video_out.release(); +} + +int main(int argc, char** argv) { + // Parsing command-line + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir.empty() + || FLAGS_video_file.empty()) { + std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ " + << "--video_file=/PATH/TO/INPUT/VIDEO/" << std::endl; + return -1; + } + if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" + || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + return -1; + } + transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; + return -1; + } + if (FLAGS_use_gpu) { + std::cout << "Deprecated, please use `--device` to set the device you want to run."; + return -1; + } + // Load model and create a vsr + PaddleGAN::VSR vsr(FLAGS_model_dir, FLAGS_config_dir, FLAGS_device, FLAGS_use_mkldnn, + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, + FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); + // Do inference on input video or image + if (!PathExists(FLAGS_output_dir)) { + MkDirs(FLAGS_output_dir); + } + PredictVideo(FLAGS_video_file, &vsr, FLAGS_output_dir); + return 0; +} diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc new file mode 100644 index 000000000..6dd36027c --- /dev/null +++ b/deploy/cpp/src/preprocess_op.cc @@ -0,0 +1,252 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "include/preprocess_op.h" + +namespace PaddleGAN { + +void InitInfo::Run(cv::Mat* im, ImageBlob* data) { + data->im_shape_ = { + static_cast(im->rows), + static_cast(im->cols) + }; + data->scale_factor_ = {1., 1.}; + data->in_net_shape_ = { + static_cast(im->rows), + static_cast(im->cols) + }; +} + +void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { + double e = 1.0; + if (is_scale_) { + e /= 255.0; + } + (*im).convertTo(*im, CV_32FC3, e); + for (int h = 0; h < im->rows; h++) { + for (int w = 0; w < im->cols; w++) { + im->at(h, w)[0] = + (im->at(h, w)[0] - mean_[0] ) / scale_[0]; + im->at(h, w)[1] = + (im->at(h, w)[1] - mean_[1] ) / scale_[1]; + im->at(h, w)[2] = + (im->at(h, w)[2] - mean_[2] ) / scale_[2]; + } + } +} + +void Permute::Run(cv::Mat* im, ImageBlob* data) { + (*im).convertTo(*im, CV_32FC3); + int rh = im->rows; + int rw = im->cols; + int rc = im->channels(); + (data->im_data_).resize(rc * rh * rw); + float* base = (data->im_data_).data(); + for (int i = 0; i < rc; ++i) { + cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); + } +} + +void Resize::Run(cv::Mat* im, ImageBlob* data) { + auto resize_scale = GenerateScale(*im); + data->im_shape_ = { + static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second) + }; + data->in_net_shape_ = { + static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second) + }; + cv::resize( + *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); + data->im_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; + data->scale_factor_ = { + resize_scale.second, + resize_scale.first, + }; +} + + +std::pair Resize::GenerateScale(const cv::Mat& im) { + std::pair resize_scale; + int origin_w = im.cols; + int origin_h = im.rows; + + if (keep_ratio_) { + int im_size_max = std::max(origin_w, origin_h); + int im_size_min = std::min(origin_w, origin_h); + int target_size_max = *std::max_element(target_size_.begin(), target_size_.end()); + int target_size_min = *std::min_element(target_size_.begin(), target_size_.end()); + float scale_min = + static_cast(target_size_min) / static_cast(im_size_min); + float scale_max = + static_cast(target_size_max) / static_cast(im_size_max); + float scale_ratio = std::min(scale_min, scale_max); + resize_scale = {scale_ratio, scale_ratio}; + } else { + resize_scale.first = + static_cast(target_size_[1]) / static_cast(origin_w); + resize_scale.second = + static_cast(target_size_[0]) / static_cast(origin_h); + } + return resize_scale; +} + +void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) { + float resize_scale = GenerateScale(*im); + int new_shape_w = std::round(im->cols * resize_scale); + int new_shape_h = std::round(im->rows * resize_scale); + data->im_shape_ = { + static_cast(new_shape_h), + static_cast(new_shape_w) + }; + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + + cv::resize( + *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA); + + data->in_net_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; + cv::copyMakeBorder( + *im, + *im, + top, + bottom, + left, + right, + cv::BORDER_CONSTANT, + cv::Scalar(127.5)); + + data->in_net_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; + + data->scale_factor_ = { + resize_scale, + resize_scale, + }; + + +} + +float LetterBoxResize::GenerateScale(const cv::Mat& im) { + int origin_w = im.cols; + int origin_h = im.rows; + + int target_h = target_size_[0]; + int target_w = target_size_[1]; + + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + return resize_scale; +} + +void PadStride::Run(cv::Mat* im, ImageBlob* data) { + if (stride_ <= 0) { + return; + } + int rc = im->channels(); + int rh = im->rows; + int rw = im->cols; + int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_; + int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_; + cv::copyMakeBorder( + *im, + *im, + 0, + nh - rh, + 0, + nw - rw, + cv::BORDER_CONSTANT, + cv::Scalar(0)); + data->in_net_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; +} + +void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) { + cv::resize( + *im, *im, cv::Size(trainsize_[0],trainsize_[1]), 0, 0, interp_); + // todo: Simd::ResizeBilinear(); + data->in_net_shape_ = { + static_cast(trainsize_[1]), + static_cast(trainsize_[0]), + }; +} + +// Preprocessor op running order +const std::vector Preprocessor::RUN_ORDER = { + "InitInfo", "TopDownEvalAffine", "Resize", "LetterBoxResize", "NormalizeImage", "PadStride", "Permute" +}; + +void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { + for (const auto& name : RUN_ORDER) { + if (ops_.find(name) != ops_.end()) { + ops_[name]->Run(im, data); + } + } +} + +void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio) { + int crop_x1 = std::max(0, area[0]); + int crop_y1 = std::max(0, area[1]); + int crop_x2 = std::min(img.cols -1, area[2]); + int crop_y2 = std::min(img.rows - 1, area[3]); + int center_x = (crop_x1 + crop_x2)/2.; + int center_y = (crop_y1 + crop_y2)/2.; + int half_h = (crop_y2 - crop_y1)/2.; + int half_w = (crop_x2 - crop_x1)/2.; + + //adjust h or w to keep image ratio, expand the shorter edge + if (half_h*3 > half_w*4){ + half_w = static_cast(half_h*0.75); + } + else{ + half_h = static_cast(half_w*4/3); + } + + crop_x1 = std::max(0, center_x - static_cast(half_w*(1+expandratio))); + crop_y1 = std::max(0, center_y - static_cast(half_h*(1+expandratio))); + crop_x2 = std::min(img.cols -1, static_cast(center_x + half_w*(1+expandratio))); + crop_y2 = std::min(img.rows - 1, static_cast(center_y + half_h*(1+expandratio))); + crop_img = img(cv::Range(crop_y1, crop_y2+1), cv::Range(crop_x1, crop_x2 + 1)); + + center.clear(); + center.emplace_back((crop_x1+crop_x2)/2); + center.emplace_back((crop_y1+crop_y2)/2); + + scale.clear(); + scale.emplace_back((crop_x2-crop_x1)); + scale.emplace_back((crop_y2-crop_y1)); +} + +} // namespace PaddleGAN diff --git a/deploy/cpp/src/vsr.cc b/deploy/cpp/src/vsr.cc new file mode 100644 index 000000000..75dd82db5 --- /dev/null +++ b/deploy/cpp/src/vsr.cc @@ -0,0 +1,193 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "include/vsr.h" + +using namespace paddle_infer; + +namespace PaddleGAN { + +// Load Model and create model predictor +void VSR::LoadModel(const std::string& model_dir, + const int batch_size, + const std::string& run_mode) { + paddle_infer::Config config; + std::string prog_file = model_dir + ".pdmodel"; + std::string params_file = model_dir + ".pdiparams"; + config.SetModel(prog_file, params_file); + if (this->device_ == "GPU") { + config.EnableUseGpu(200, this->gpu_id_); + config.SwitchIrOptim(true); + // use tensorrt + if (run_mode != "fluid") { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (run_mode == "trt_fp32") { + precision = paddle_infer::Config::Precision::kFloat32; + } + else if (run_mode == "trt_fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } + else if (run_mode == "trt_int8") { + precision = paddle_infer::Config::Precision::kInt8; + } else { + printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + } + // set tensorrt + config.EnableTensorRtEngine( + 1 << 30, + batch_size, + this->min_subgraph_size_, + precision, + false, + this->trt_calib_mode_); + + // set use dynamic shape + if (this->use_dynamic_shape_) { + // set DynamicShsape for image tensor + const std::vector min_input_shape = {1, 3, this->trt_min_shape_, this->trt_min_shape_}; + const std::vector max_input_shape = {1, 3, this->trt_max_shape_, this->trt_max_shape_}; + const std::vector opt_input_shape = {1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; + const std::map> map_min_input_shape = {{"image", min_input_shape}}; + const std::map> map_max_input_shape = {{"image", max_input_shape}}; + const std::map> map_opt_input_shape = {{"image", opt_input_shape}}; + + config.SetTRTDynamicShapeInfo(map_min_input_shape, + map_max_input_shape, + map_opt_input_shape); + std::cout << "TensorRT dynamic shape enabled" << std::endl; + } + } + + } else if (this->device_ == "XPU"){ + config.EnableXpu(10*1024*1024); + } else { + config.DisableGpu(); + if (this->use_mkldnn_) { + config.EnableMKLDNN(); + // cache 10 different shapes for mkldnn to avoid memory leak + config.SetMkldnnCacheCapacity(10); + } + config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); + } + config.SwitchUseFeedFetchOps(false); + config.SwitchIrOptim(true); + config.DisableGlogInfo(); + // Memory optimization + config.EnableMemoryOptim(); + predictor_ = std::move(CreatePredictor(config)); +} + +void VSR::Preprocess(const cv::Mat& ori_im) { + // Clone the image : keep the original mat for postprocess + cv::Mat im = ori_im.clone(); + cv::cvtColor(im, im, cv::COLOR_BGR2RGB); + preprocessor_.Run(&im, &inputs_); +} + + +void VSR::Predict(const std::vector imgs, + const int warmup, + const int repeats, + std::vector* result, + std::vector* times) { + auto preprocess_start = std::chrono::steady_clock::now(); + int frames_num = imgs.size(); + // in_data_batch + std::vector in_data_all; + std::vector im_shape_all(frames_num * 2); + std::vector scale_factor_all(frames_num * 2); + std::vector output_data_list_; + std::vector out_bbox_num_data_; + + // Preprocess image + for (int bs_idx = 0; bs_idx < frames_num; bs_idx++) { + cv::Mat im = imgs.at(bs_idx); + Preprocess(im); + im_shape_all[bs_idx * 2] = inputs_.im_shape_[0]; + im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1]; + + scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0]; + scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; + + // TODO: reduce cost time + in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + } + auto preprocess_end = std::chrono::steady_clock::now(); + // Prepare input tensor + auto input_names = predictor_->GetInputNames(); + for (const auto& tensor_name : input_names) { + auto in_tensor = predictor_->GetInputHandle(tensor_name); + int rh = inputs_.in_net_shape_[0]; + int rw = inputs_.in_net_shape_[1]; + in_tensor->Reshape({1, frames_num, 3, rw, rh}); + in_tensor->CopyFromCpu(in_data_all.data()); + } + // warmup + for (int i = 0; i < warmup; i++) { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + for (int j = 0; j < output_names.size(); j++) { + auto output_tensor = predictor_->GetOutputHandle(output_names[j]); + std::vector output_shape = output_tensor->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), + 1, std::multiplies()); + std::vector out_data; + out_data.resize(out_num); + output_tensor->CopyToCpu(out_data.data()); + + } + } + + auto inference_start = std::chrono::steady_clock::now(); + for (int i = 0; i < repeats; i++) { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + for (int j = 0; j < output_names.size(); j++) { + auto output_tensor = predictor_->GetOutputHandle(output_names[j]); + std::vector output_shape = output_tensor->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), + 1, std::multiplies()); + std::vector out_data; + out_data.resize(out_num); + output_tensor->CopyToCpu(out_data.data()); + cv::Mat res = cv::Mat::zeros(output_shape[3], output_shape[4], CV_32FC3); + int pix_num = output_shape[3] * output_shape[4]; + int frame_pix_num = pix_num * 3; + for (int frame = 0; frame < output_shape[1]; frame++) { + int index = 0; + for (int h = 0; h < output_shape[3]; ++h) { + for (int w = 0; w < output_shape[4]; ++w) { + res.at(h, w) = {out_data[2*pix_num+index+frame_pix_num*frame], out_data[pix_num+index+frame_pix_num*frame], out_data[index+frame_pix_num*frame]}; // R,G,B + index+=1; + } + } + result->push_back(res); + } + } + } + auto inference_end = std::chrono::steady_clock::now(); + + std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration inference_diff = inference_end - inference_start; + times->push_back(double(inference_diff.count() / repeats * 1000)); + +} + +} // namespace PaddleGAN From dfba7f90578c77c2443c3f02201f01170864efe8 Mon Sep 17 00:00:00 2001 From: lzzyzlbb <287246233@qq.com> Date: Wed, 8 Dec 2021 08:47:51 +0000 Subject: [PATCH 2/4] add c++ deploy for msvsr --- deploy/cpp/docs/Jetson_build.md | 210 ------------------------ deploy/cpp/docs/windows_vs2019_build.md | 152 ----------------- 2 files changed, 362 deletions(-) delete mode 100644 deploy/cpp/docs/Jetson_build.md delete mode 100644 deploy/cpp/docs/windows_vs2019_build.md diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md deleted file mode 100644 index 4f54738de..000000000 --- a/deploy/cpp/docs/Jetson_build.md +++ /dev/null @@ -1,210 +0,0 @@ -# Jetson平台编译指南 - -## 说明 -`NVIDIA Jetson`设备是具有`NVIDIA GPU`的嵌入式设备,可以将目标检测算法部署到该设备上。本文档是在`Jetson`硬件上部署`PaddleDetection`模型的教程。 - -本文档以`Jetson TX2`硬件、`JetPack 4.3`版本为例进行说明。 - -`Jetson`平台的开发指南请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html). - -## Jetson环境搭建 -`Jetson`系统软件安装,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html). - -* (1) 查看硬件系统的l4t的版本号 -``` -cat /etc/nv_tegra_release -``` -* (2) 根据硬件,选择硬件可安装的`JetPack`版本,硬件和`JetPack`版本对应关系请参考[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive). - -* (3) 下载`JetPack`,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html) 中的`Preparing a Jetson Developer Kit for Use`章节内容进行刷写系统镜像。 - -**注意**: 请在[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive) 根据硬件选择适配的`JetPack`版本进行刷机。 - -## 下载或编译`Paddle`预测库 -本文档使用`Paddle`在`JetPack4.3`上预先编译好的预测库,请根据硬件在[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 中选择对应版本的`Paddle`预测库。 - -这里选择[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.0-nv-jetson-jetpack4.3-all/paddle_inference.tgz), `Paddle`版本`2.0.0-rc0`,`CUDA`版本`10.0`,`CUDNN`版本`7.6`,`TensorRT`版本`6`。 - -若需要自己在`Jetson`平台上自定义编译`Paddle`库,请参考文档[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html) 的`NVIDIA Jetson嵌入式硬件预测库源码编译`部分内容。 - -### Step1: 下载代码 - - `git clone https://github.com/PaddlePaddle/PaddleDetection.git` - -**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference - -解压下载的[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.1-nv-jetson-jetpack4.3-all/paddle_inference.tgz) 。 - -下载并解压后`/root/projects/paddle_inference`目录包含内容为: -``` -paddle_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -**注意:** 预编译库`nv-jetson-cuda10-cudnn7.6-trt6`使用的`GCC`版本是`7.5.0`,其他都是使用`GCC 4.8.5`编译的。使用高版本的GCC可能存在`ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。 - - -### Step4: 编译 - -编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: - -注意,`TX2`平台的`CUDA`、`CUDNN`需要通过`JetPack`安装。 - -``` -# 是否使用GPU(即是否使用 CUDA) -WITH_GPU=ON - -# 是否使用MKL or openblas,TX2需要设置为OFF -WITH_MKL=OFF - -# 是否集成 TensorRT(仅WITH_GPU=ON 有效) -WITH_TENSORRT=ON - -# TensorRT 的include路径 -TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu - -# TensorRT 的lib路径 -TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu - -# Paddle 预测库路径 -PADDLE_DIR=/path/to/paddle_inference/ - -# Paddle 预测库名称 -PADDLE_LIB_NAME=paddle_inference - -# Paddle 的预测库是否使用静态库来编译 -# 使用TensorRT时,Paddle的预测库通常为动态库 -WITH_STATIC_LIB=OFF - -# CUDA 的 lib 路径 -CUDA_LIB=/usr/local/cuda-10.0/lib64 - -# CUDNN 的 lib 路径 -CUDNN_LIB=/usr/lib/aarch64-linux-gnu - -# 是否开启关键点模型预测功能 -WITH_KEYPOINT=ON - -# OPENCV_DIR 的路径 -# linux平台请下载:https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2,并解压到deps文件夹下 -# TX2平台请下载:https://paddlemodels.bj.bcebos.com/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip,并解压到deps文件夹下 -OPENCV_DIR=/path/to/opencv - -# 请检查以上各个路径是否正确 - -# 以下无需改动 -cmake .. \ - -DWITH_GPU=${WITH_GPU} \ - -DWITH_MKL=OFF \ - -DWITH_TENSORRT=${WITH_TENSORRT} \ - -DTENSORRT_DIR=${TENSORRT_DIR} \ - -DPADDLE_DIR=${PADDLE_DIR} \ - -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ - -DCUDA_LIB=${CUDA_LIB} \ - -DCUDNN_LIB=${CUDNN_LIB} \ - -DOPENCV_DIR=${OPENCV_DIR} \ - -DPADDLE_LIB_NAME={PADDLE_LIB_NAME} \ - -DWITH_KEYPOINT=${WITH_KEYPOINT} -make -``` - -例如设置如下: -``` -# 是否使用GPU(即是否使用 CUDA) -WITH_GPU=ON - -# 是否使用MKL or openblas -WITH_MKL=OFF - -# 是否集成 TensorRT(仅WITH_GPU=ON 有效) -WITH_TENSORRT=OFF - -# TensorRT 的include路径 -TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu - -# TensorRT 的lib路径 -TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu - -# Paddle 预测库路径 -PADDLE_DIR=/home/nvidia/PaddleDetection_infer/paddle_inference/ - -# Paddle 预测库名称 -PADDLE_LIB_NAME=paddle_inference - -# Paddle 的预测库是否使用静态库来编译 -# 使用TensorRT时,Paddle的预测库通常为动态库 -WITH_STATIC_LIB=OFF - -# CUDA 的 lib 路径 -CUDA_LIB=/usr/local/cuda-10.0/lib64 - -# CUDNN 的 lib 路径 -CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ - -# 是否开启关键点模型预测功能 -WITH_KEYPOINT=ON -``` - -修改脚本设置好主要参数后,执行`build`脚本: - ```shell - sh ./scripts/build.sh - ``` - -### Step5: 预测及可视化 -编译成功后,预测入口程序为`build/main`其主要命令参数说明如下: -| 参数 | 说明 | -| ---- | ---- | -| --model_dir | 导出的检测预测模型所在路径 | -| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 | -| --image_file | 要预测的图片文件路径 | -| --image_dir | 要预测的图片文件夹路径 | -| --video_file | 要预测的视频文件路径 | -| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| -| --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| -| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | -| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | -| --run_benchmark | 是否重复预测来进行benchmark测速 | -| --output_dir | 输出图片所在的文件夹, 默认为output | -| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | -| --cpu_threads | 设置cpu线程数,默认为1 | -| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | - -**注意**: -- 优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 -- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 - - -`样例一`: -```shell -#不使用`GPU`测试图片 `/root/projects/images/test.jpeg` -./main --model_dir=/root/projects/models/yolov3_darknet --image_file=/root/projects/images/test.jpeg -``` - -图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 - - -`样例二`: -```shell -#使用 `GPU`预测视频`/root/projects/videos/test.mp4` -./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU -``` -视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 - -`样例三`: -```shell -#使用关键点模型与检测模型联合预测,使用 `GPU`预测 -#检测模型检测到的人送入关键点模型进行关键点预测 -./main --model_dir=/root/projects/models/yolov3_darknet --model_dir_keypoint=/root/projects/models/hrnet_w32_256x192 --image_file=/root/projects/images/test.jpeg --device=GPU -``` - -## 性能测试 -benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md) diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md deleted file mode 100644 index 9c5a2d33d..000000000 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ /dev/null @@ -1,152 +0,0 @@ -# Visual Studio 2019 Community CMake 编译指南 - -Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 - - -## 前置条件 -* Visual Studio 2019 (根据Paddle预测库所使用的VS版本选择,请参考 [Visual Studio 不同版本二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=vs-2019) ) -* CUDA 9.0 / CUDA 10.0,cudnn 7+ / TensoRT(仅在使用GPU版本的预测库时需要) -* CMake 3.0+ [CMake下载](https://cmake.org/download/) - -**特别注意:windows下预测库需要的TensorRT版本为:**。 - -| 预测库版本 | TensorRT版本 | -| ---- | ---- | -| cuda10.1_cudnn7.6_avx_mkl_trt6 | TensorRT-6.0.1.5 | -| cuda10.2_cudnn7.6_avx_mkl_trt7 | TensorRT-7.0.0.11 | -| cuda11.0_cudnn8.0_avx_mkl_trt7 | TensorRT-7.2.1.6 | - -请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 - -**下面所有示例以工作目录为 `D:\projects`演示**。 - -### Step1: 下载代码 - -下载源代码 -```shell -git clone https://github.com/PaddlePaddle/PaddleDetection.git -``` - -**说明**:其中`C++`预测代码在`PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference - -PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows) - -解压后`D:\projects\paddle_inference`目录包含内容为: -``` -paddle_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示(如果使用全局绝对路径,可以不用设置环境变量) - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` - -### Step4: 编译 - -1. 进入到`cpp`文件夹 -``` -cd D:\projects\PaddleDetection\deploy\cpp -``` - -2. 使用CMake生成项目文件 - -编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): - -| 参数名 | 含义 | -| ---- | ---- | -| *CUDA_LIB | CUDA的库路径 | -| *CUDNN_LIB | CUDNN的库路径 | -| OPENCV_DIR | OpenCV的安装路径, | -| PADDLE_DIR | Paddle预测库的路径 | -| PADDLE_LIB_NAME | Paddle 预测库名称 | - -**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的勾去掉 2. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉 3.如无需使用关键点模型可以把`WITH_KEYPOINT`勾去掉 - -执行如下命令项目文件: -``` -cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=path_to_cuda_lib -DCUDNN_LIB=path_to_cudnn_lib -DPADDLE_DIR=path_to_paddle_lib -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=path_to_opencv -DWITH_KEYPOINT=ON -``` - -例如: -``` -cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=D:\projects\packages\cuda10_0\lib\x64 -DCUDNN_LIB=D:\projects\packages\cuda10_0\lib\x64 -DPADDLE_DIR=D:\projects\packages\paddle_inference -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=D:\projects\packages\opencv3_4_6 -DWITH_KEYPOINT=ON -``` - -3. 编译 -用`Visual Studio 16 2019`打开`cpp`文件夹下的`PaddleObjectDetector.sln`,将编译模式设置为`Release`,点击`生成`->`全部生成 - - -### Step5: 预测及可视化 - -上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: - -``` -cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release -``` -可执行文件`main`即为样例的预测程序,其主要的命令行参数如下: - -| 参数 | 说明 | -| ---- | ---- | -| --model_dir | 导出的检测预测模型所在路径 | -| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 | -| --image_file | 要预测的图片文件路径 | -| --image_dir | 要预测的图片文件夹路径 | -| --video_file | 要预测的视频文件路径 | -| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| -| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| -| --gpu_id | 指定进行推理的GPU device id(默认值为0)| -| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| -| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | -| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | -| --run_benchmark | 是否重复预测来进行benchmark测速 | -| --output_dir | 输出图片所在的文件夹, 默认为output | -| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | -| --cpu_threads | 设置cpu线程数,默认为1 | -| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | - -**注意**: -(1)优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 -(2)如果提示找不到`opencv_world346.dll`,把`D:\projects\packages\opencv3_4_6\build\x64\vc14\bin`文件夹下的`opencv_world346.dll`拷贝到`main.exe`文件夹下即可。 -(3)--run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 - - -`样例一`: -```shell -#不使用`GPU`测试图片 `D:\\images\\test.jpeg` -.\main --model_dir=D:\\models\\yolov3_darknet --image_file=D:\\images\\test.jpeg -``` - -图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 - - -`样例二`: -```shell -#使用`GPU`测试视频 `D:\\videos\\test.mp4` -.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU -``` - -视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 - - -`样例三`: -```shell -#使用关键点模型与检测模型联合预测,使用 `GPU`预测 -#检测模型检测到的人送入关键点模型进行关键点预测 -.\main --model_dir=D:\\models\\yolov3_darknet --model_dir_keypoint=D:\\models\\hrnet_w32_256x192 --image_file=D:\\images\\test.jpeg --device=GPU -``` - - -## 性能测试 -Benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md) From 90579b864bddf4c9d860b558b7e4c125c137cf5d Mon Sep 17 00:00:00 2001 From: lzzyzlbb <287246233@qq.com> Date: Wed, 8 Dec 2021 08:56:07 +0000 Subject: [PATCH 3/4] add c++ deploy for msvsr --- deploy/cpp/docs/linux_build.md | 6 +++--- deploy/cpp/scripts/build.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index 6b8ed9a52..259da98a2 100755 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -100,18 +100,18 @@ make 编译成功后,预测入口程序为`build/main`其主要命令参数说明如下: | 参数 | 说明 | | ---- | ---- | -| --model_dir | 导出的检测预测模型所在路径 | +| --model_dir | 导出的预测模型所在路径 | +| --model_dir | 导出的预测模型配置文件所在路径 | | --video_file | 要预测的视频文件路径 | +| --frame_enum | 超分视频序列 | | --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 | | --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 | -| --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 | | --cpu_threads | 设置cpu线程数,默认为1 | -| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true | `样例一`: ```shell diff --git a/deploy/cpp/scripts/build.sh b/deploy/cpp/scripts/build.sh index c9bc82bb4..9e1ccd307 100644 --- a/deploy/cpp/scripts/build.sh +++ b/deploy/cpp/scripts/build.sh @@ -17,13 +17,13 @@ TENSORRT_INC_DIR=/path/to/tensorrt/include TENSORRT_LIB_DIR=/path/to/tensorrt/lib # Paddle 预测库路径 -PADDLE_DIR=../../../../paddle_inference +PADDLE_DIR=/path/to/paddle_inference # CUDA 的 lib 路径 -CUDA_LIB=/usr/local/cuda-9.0/targets/x86_64-linux/lib/ +CUDA_LIB=/path/to/cuda/lib # CUDNN 的 lib 路径 -CUDNN_LIB=/usr/lib/x86_64-linux-gnu/ +CUDNN_LIB=/path/to/cudnn/lib WITH_VSR=ON From 16cb4d4a0a17e07b7651fe40a9559dd383875ff7 Mon Sep 17 00:00:00 2001 From: lzzyzlbb <287246233@qq.com> Date: Wed, 8 Dec 2021 09:03:49 +0000 Subject: [PATCH 4/4] add c++ deploy for msvsr --- deploy/cpp/README.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/deploy/cpp/README.md b/deploy/cpp/README.md index ffa5e251e..904e622ef 100644 --- a/deploy/cpp/README.md +++ b/deploy/cpp/README.md @@ -4,8 +4,6 @@ ## 各环境编译部署教程 - [Linux 编译部署](docs/linux_build.md) -- [Windows编译部署(使用Visual Studio 2019)](docs/windows_vs2019_build.md) -- [NV Jetson编译部署](docs/Jetson_build.md) ## C++部署总览 @@ -16,13 +14,13 @@ ### 1.说明 -本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。 +本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleGAN`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。 主要设计的目标包括以下四点: -- 跨平台,支持在 `Windows` 和 `Linux` 完成编译、二次开发集成和部署运行 +- 支持在 `Linux` 完成编译、二次开发集成和部署运行 - 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑 - 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化 -- 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`等 + ### 2.主要目录和文件 @@ -31,23 +29,20 @@ deploy/cpp | ├── src │ ├── main.cc # 集成代码示例, 程序入口 -│ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现 +│ ├── vsr.cc # 模型加载和预测主要逻辑封装类实现 │ └── preprocess_op.cc # 预处理相关主要逻辑封装实现 | ├── include │ ├── config_parser.h # 导出模型配置yaml文件解析 -│ ├── object_detector.h # 模型加载和预测主要逻辑封装类 +│ ├── vsr.h # 模型加载和预测主要逻辑封装类 │ └── preprocess_op.h # 预处理相关主要逻辑类封装 | ├── docs │ ├── linux_build.md # Linux 编译指南 -│ └── windows_vs2019_build.md # Windows VS2019编译指南 │ ├── build.sh # 编译命令脚本 │ ├── CMakeList.txt # cmake编译入口文件 -| -├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置 │ └── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp)