基于Caffe-ssd的目标检测C++实现

正文索引 [隐藏]

Autho: Michael.Chen

Website: www.tgeek.tech


1. caffe-ssd编译

1.1 下载caffe-ssd源码

克隆ssd分支

git clone ssd https://github.com/weiliu89/caffe.git 

1.2 配置编译

进入caffe-ssd目录,修改Makefile.config

cd caffe-ssd
cp Makefile.config.example Makefile.config

如果使用cudnn gpu加速,去掉第5行注释,或者只使用cpu,去掉第8行注释

## Refer to http://caffe.berkeleyvision.org/installation.html
# Contributions simplifying and improving our build system are welcome!

# cuDNN acceleration switch (uncomment to build with cuDNN).
 USE_CUDNN := 1

# CPU-only switch (uncomment to build without GPU support).
# CPU_ONLY := 1

1.3 编译

mkdir build
cd build
cmake ..
make all -j
make install
make runtest
make pycaffe

到这里caffe-ssd安装就结束了

2. SSD检测的实现

这里本来是准备编译成.so动态链接库的,但是编译之后进行调用在OpenCV的merge()函数中的GetMatVector()会出现错误,尝试各种方法都没有解决。如果有大佬有解决方案请指正。

这里放出的是完整实现

2.1 Detector类介绍 detection.hpp

i. 配置文件读取

这里我们还是使用Opencv的FileStorage类读取.xml配置文件

cv::FileStorage setting_fs("../param/param.xml", cv::FileStorage::READ);
    if(!setting_fs.isOpened()){
      std::cout<<"ERROR: Open config file failed"<<std::endl;
      exit(-1);
    }

其中param.xml配置文件写法如下

对应参数为

device -选择使用GPU还是CPU

thresh -置信度阈值

mean_value -预训练模型均值(二选一)

mean_file -预训练模型均值二进制文件(二选一)

model_file -网络结构配置文件

weights_file -预训练模型

name_file -标签文件

<?xml version="1.0"?>
<opencv_storage>

<!-->Configration<-->
<device>1</device>              <!-->0-cpu 1-gpu<-->
<thresh>0.5</thresh>               <!-->confidence threshold<-->

<!-->ssd configration files<-->
<mean_value>(127.5,127.5,127.5)</mean_value> 
<mean_file></mean_file>
<model_file>../../dnn_nets/ssd300/deploy.prototxt</model_file>
<weights_file>../../dnn_nets/ssd300/VGG_VOC0712_SSD_300x300_iter_120000.caffemodel</weights_file>
<name_file>../../dnn_nets/ssd300/ssd.names</name_file>
</opencv_storage>

读取参数进入程序

    setting_fs["device"] >> device_type;
    setting_fs["thresh"] >> thresh;
    setting_fs["model_file"] >> model_file;
    setting_fs["weights_file"] >> weights_file;
    setting_fs["mean_file"] >> mean_file;
    setting_fs["mean_value"] >> mean_value;
    setting_fs["name_file"] >> name_file;

ii. 网络初始化

ii.a 设置设备
if (device_type)
      caffe::Caffe::set_mode(caffe::Caffe::GPU);
else
      caffe::Caffe::set_mode(caffe::Caffe::CPU);
ii.b 读取网络文件
    // Load the network
    net_.reset(new caffe::Net<float>(model_file, caffe::TEST));
    net_->CopyTrainedLayersFrom(weights_file);
    if(net_->num_inputs()!=1) 
      std::cout<<"ERROR: Network should have exactly one input."<<std::endl;

    caffe::Blob<float> *input_layer = net_->input_blobs()[0];
    num_channels_ = input_layer->channels();
    if(num_channels_ != 3 && num_channels_ != 1)
      std::cout<< "ERROR: Input layer should have 1 or 3 channels."<<std::endl;

    input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
ii.c 计算网络均值
    // Load the binaryproto mean file. 
    SetMean(mean_file, mean_value);
ii.d 读取标签文件
    std::ifstream ifs(name_file.c_str());
    if(!ifs.is_open()){
      std::cout<<"ERROR: Cannot find labels file in \""<<name_file<<"\""<<std::endl;
    }
    std::string line;
    while (std::getline(ifs, line)) labels.push_back(line);

iii. 公共函数以及公共变量

iii.a 预测值计算函数
  // Get output results
  void GetResult(cv::Mat frame);
iii.b 预测值绘制函数
  // Draw output to image  
void DrawResult(cv::Mat& frame, std::vector<std::string> Class_names,std::vector<cv::Rect> Boxes,std::vector<float> Confidences,std::vector<cv::Point> Centers);
iii.c 预测值

Boxes -目标包围矩形预测值

Class_names -目标类别预测值

Confidences -目标置信度预测值

Centers -目标中心点预测值

  std::vector<cv::Rect> Boxes;          // bounding boxes of detected objects
  std::vector<std::string> Class_names; // classes of detected objects
  std::vector<float> Confidences;       // confidences of detected objects
  std::vector<cv::Point> Centers;       // center position of detected objects

iv. 私有函数以及私有变量

iv.a 均值计算函数
  // Load mean value
  void SetMean(const std::string &mean_file, const std::string &mean_value);
iv.b 输入层初始化函数
  // Wrap the input layer of the network in separate cv::Mat objects
  void WrapInputLayer(std::vector<cv::Mat>* input_channels);
iv.c 输入图像初始化函数
  // Convert the input image to the input image format of the network.
  void Preprocess(const cv::Mat& img,std::vector<cv::Mat>* input_channels);
iv.d 预测函数
  // Get outputs of output layers of network
  std::vector<std::vector<float> > Detect(const cv::Mat& img);
iv.e 网络结构私有变量
  caffe::shared_ptr<caffe::Net<float> > net_;    // Caffe network
  cv::Size input_geometry_;               // input size
  int num_channels_;                      // number of input channels
  cv::Mat mean_;                          // mean value
iv.f 网络读入私有变量
  std::string name_file;                  // label file
  int device_type;                        // device type: cpu/gpu
  std::string model_file;                 // network struture file
  std::string weights_file;               // pretrained model file
  std::string mean_file;                  // binary mean file
  std::string mean_value;                 // mean value
  std::vector<std::string> labels;        // classes
  float thresh;                           // confidence threshold

2.2 函数具体实现 detection.cpp

i. 均值计算函数 Detector::SetMean

就是很简单的根据输入层的结构,创建均值Mat。

举个栗子,我们读入的均值为(0,127.5,127.5),输入层结构为3x300x300即三通道,300×300的矩阵。

那么这个函数会生成3个300×300的Mat并储存在一个vector里(三个矩阵值分别为0,127.5,127.5),然后通过OpenCV.merge()函数融合成一个Mat mean_即为此函数输出值。

/****************************************************************************** 
    Function:       Detector::SetMean
    Description:    Load the mean file in binaryproto format
    Input:          const std::string& mean_file        -binary file of mean 
                    const std::string& mean_value       -value of mean (1 or 3 channels)
    Output:         cv::Mat mean_                       -the mean image
    Return:         void
****************************************************************************/
void Detector::SetMean(const std::string& mean_file, const std::string& mean_value) {
  cv::Scalar channel_mean;
  // load mean file
  if (!mean_file.empty()) {
    CHECK(mean_value.empty()) <<
      "Cannot specify mean_file and mean_value at the same time";
    caffe::BlobProto blob_proto;
    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);

    // Convert from BlobProto to Blob<float> 
    caffe::Blob<float> mean_blob;
    mean_blob.FromProto(blob_proto);
    CHECK_EQ(mean_blob.channels(), num_channels_)
      << "Number of channels of mean file doesn't match input layer.";

    // The format of the mean file is planar 32-bit float BGR or grayscale.
    std::vector<cv::Mat> channels;
    float* data = mean_blob.mutable_cpu_data();
    for (int i = 0; i < num_channels_; ++i) {
      // Extract an individual channel. 
      cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
      channels.push_back(channel);
      data += mean_blob.height() * mean_blob.width();
    }

    // Merge the separate channels into a single image.
    cv::Mat mean;
    cv::merge(channels, mean);

    // Compute the global mean pixel value and create a mean image
    // filled with this value. 
    channel_mean = cv::mean(mean);
    mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
  }
  // use mean value
  if (!mean_value.empty()) {
    CHECK(mean_file.empty()) <<
      "Cannot specify mean_file and mean_value at the same time";
    std::stringstream ss(mean_value);
    std::vector<float> values;
    std::string item;
    while (getline(ss, item, ',')) {
      float value = std::atof(item.c_str());
      values.push_back(value);
    }
    CHECK(values.size() == 1 || values.size() == num_channels_) <<
      "Specify either 1 mean_value or as many as channels: " << num_channels_;

    std::vector<cv::Mat> channels;
    for (int i = 0; i < num_channels_; ++i) {
      /* Extract an individual channel. */
      cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
          cv::Scalar(values[i]));
      channels.push_back(channel);
    }
    cv::merge(channels, mean_);
  }
}

ii. 输入层初始化函数 Detector::WrapInputLayer

/****************************************************************************** 
    Function:       Detector::WrapInputLayer
    Description:    Wrap the input layer of the network in separate cv::Mat objects,
                    The last preprocessing operation will write the separate channels 
                    directly to the input layer
    Input:          std::vector<cv::Mat>* input_channels      
    Output:         std::vector<cv::Mat>* input_channels      -input layer
    Return:         void
****************************************************************************/
void Detector::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
  caffe::Blob<float>* input_layer = net_->input_blobs()[0];

  int width = input_layer->width();
  int height = input_layer->height();
  float* input_data = input_layer->mutable_cpu_data();
  for (int i = 0; i < input_layer->channels(); ++i) {
    cv::Mat channel(height, width, CV_32FC1, input_data);
    input_channels->push_back(channel);
    input_data += width * height;
  }
}

iii. 输入图像初始化函数 Detector::Preprocess

/****************************************************************************** 
    Function:       Detector::Preprocess
    Description:    Convert the input image to the input image format of the network
    Input:          const cv::Mat& img                        -image need to be detected
                    std::vector<cv::Mat>* input_channels      -input layer
    Output:          const cv::Mat& img                       -image need to be detected
    Return:         void
****************************************************************************/
void Detector::Preprocess(const cv::Mat& img,
                            std::vector<cv::Mat>* input_channels) {
  cv::Mat sample;
  if (img.channels() == 3 && num_channels_ == 1)
    cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
  else if (img.channels() == 4 && num_channels_ == 1)
    cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
  else if (img.channels() == 4 && num_channels_ == 3)
    cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
  else if (img.channels() == 1 && num_channels_ == 3)
    cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
  else
    sample = img;

  cv::Mat sample_resized;
  if (sample.size() != input_geometry_)
    cv::resize(sample, sample_resized, input_geometry_);
  else
    sample_resized = sample;

  cv::Mat sample_float;
  if (num_channels_ == 3)
    sample_resized.convertTo(sample_float, CV_32FC3);
  else
    sample_resized.convertTo(sample_float, CV_32FC1);

  cv::Mat sample_normalized;
  cv::subtract(sample_float, mean_, sample_normalized);

  /* This operation will write the separate BGR planes directly to the
   * input layer of the network because it is wrapped by the cv::Mat
   * objects in input_channels. */
  cv::split(sample_normalized, *input_channels);

  CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
        == net_->input_blobs()[0]->cpu_data())
    << "Input channels are not wrapping the input layer of the network.";
  }

iv. 预测函数 Detector::Detect

/****************************************************************************** 
    Function:       Detector::Detect
    Description:    Get the outputs of output layers of network
    Input:          cv::Mat &img                -the image need to be detected
    Output:         std::vector<std::vector<float>> detections     -the outputs of output layers
    Return:         std::vector<std::vector<float>> detections 
    ****************************************************************************/
std::vector<std::vector<float>> Detector::Detect(const cv::Mat &img)
{
  std::vector<std::vector<float>> detections;
  caffe::Blob<float> *input_layer = net_->input_blobs()[0];

  input_layer->Reshape(1, num_channels_,input_geometry_.height, input_geometry_.width);
  /* Forward dimension change to all layers. */
  net_->Reshape();

  // Wrap the input layer of the network in separate cv::Mat objects
  std::vector<cv::Mat> input_channels;
  WrapInputLayer(&input_channels);

  // Convert the input image to the input image format of the network.
  Preprocess(img, &input_channels);
  // run forward progress
  net_->Forward();

  // Copy the output layer to a std::vector
  caffe::Blob<float> *result_blob = net_->output_blobs()[0];
  const float *result = result_blob->cpu_data();
  const int num_det = result_blob->height();
  for (int k = 0; k < num_det; ++k)
  {
    if (result[0] == -1)
    {
      // Skip invalid detection.
      result += 7;
      continue;
    }
    std::vector<float> detection(result, result + 7);
    detections.push_back(detection);
    result += 7;
  }
  return detections;
}

v.预测值计算函数 Detector::GetResult

  /****************************************************************************** 
    Function:       Detector::GetResult
    Description:    Run prediction
    Input:          cv::Mat frame                               -image need to be detected
    Output:         std::vector<cv::Rect> Boxes;                -bounding boxes of detected objects
                    std::vector<std::string> Class_names;       -classes of detected objects
                    std::vector<float> Confidences;             -confidences of detected objects
                    std::vector<cv::Point> Centers;             -center position of detected objects
    Return:         void
****************************************************************************/
  void Detector::GetResult(cv::Mat frame)
  {
    // clear outputs
    Class_names.clear();
    Confidences.clear();
    Boxes.clear();
    Centers.clear();

    std::vector<std::vector<float>> detections = Detect(frame);
    for (int i = 0; i < detections.size(); ++i)
    {
      std::vector<float> &d = detections[i];
      // Detection format: [image_id, label, score, xmin, 7ymin, xmax, ymax].
      const float score = d[2];
      if (score >= thresh)
      {
        Class_names.push_back(labels[int(d[1])]);
        Confidences.push_back(score);
        Boxes.push_back(cv::Rect(int(d[3] * frame.cols), int(d[4] * frame.rows),
                                int((d[5] - d[3]) * frame.cols), int((d[6] - d[4]) * frame.rows)));
        Centers.push_back(cv::Point(int(((d[5] + d[3]) * frame.cols)/2),int(((d[6] + d[4]) * frame.rows)/2)));
      }
    }
  }

vi. 预测值绘制函数 Detector::DrawResult

/****************************************************************************** 
    Function:       Detector::GetResult
    Description:    Run prediction
    Input:          cv::Mat frame                               -image need to draw
                    std::vector<cv::Rect> Boxes;                -bounding boxes of detected objects
                    std::vector<std::string> Class_names;       -classes of detected objects
                    std::vector<float> Confidences;             -confidences of detected objects
                    std::vector<cv::Point> Centers;             -center position of detected objects
    Outputs:        cv::Mat frame                               -image drawn results
    Return:         void
****************************************************************************/
  void Detector::DrawResult(cv::Mat &frame, std::vector<std::string> Class_names,
                            std::vector<cv::Rect> Boxes, std::vector<float> Confidences, std::vector<cv::Point> Centers)
  {
    for(int n=0; n<Class_names.size(); n++){
      cv::rectangle(frame,Boxes[n],cv::Scalar(255, 178, 50), 3);
      std::string label = cv::format("%.2f%%", 100*Confidences[n]);
      label = Class_names[n] + ":" + label;
      //Display the label at the top of the bounding box
      int baseLine;
      cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
      int top = std::max(Boxes[n].y, labelSize.height);
      int left = Boxes[n].x;
      rectangle(frame, cv::Point(left, top - round(1.5 * labelSize.height)), cv::Point(left + round(1.5 * labelSize.width), top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
      putText(frame, label, cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0), 1);
      putText(frame, cv::format("* (%d,%d)", Centers[n].x, Centers[n].y), Centers[n], cv::FONT_HERSHEY_SIMPLEX, 0.3, cv::Scalar(0, 0, 255), 1);
    }
  }

2.3 主函数 main.cpp

#include "detection.hpp"

int main(int argc, char **argv)
{
    Detector detector;

    cv::VideoCapture capture;
    capture.open("../test.mp4");
    if (capture.isOpened())
    {
        std::cout<<"INFO: Video file load sucessfully"<<std::endl;
    }
    else std::cout<<"INFO: Video file load Error"<<std::endl;
    cv::Mat frame,out_image;
    while(capture.read(frame))
    {
        frame.copyTo(out_image);
        detector.GetResult(out_image);
        detector.DrawResult(out_image,detector.Class_names,detector.Boxes,detector.Confidences,detector.Centers);
        cv::imshow("demo", out_image);
        cv::waitKey(5);
    }
    return 0;
}

2.4 CMakeList.txt

需要在include_directories添加caffe include,target_link_libraries添加caffe-ssd动态链接库

cmake_minimum_required (VERSION 2.8)

# project name
project (caffedetector)
# using C++11 
set(CMAKE_CXX_FLAGS "${CAMKE_CXX_FLAGS} -std=c++11 -pthread")

# if u have OpenCV version more than one, set the build path which one u want to use
# set(OpenCV_DIR "/home/user/opencv-4.1.0/build")
# find opencv
find_package(OpenCV REQUIRED)
# print message of opencv
message(STATUS "OpenCV version: ${OpenCV_VERSION}")
message(STATUS "OpenCV include path: ${OpenCV_INCLUDE_DIRS}")
message(STATUS "    libraries: ${OpenCV_LIBS}")

find_package( OpenCV REQUIRED )

include_directories ( 
    ${OpenCV_INCLUDE_DIRS}
    /home/user/caffe-ssd/include        # caffe include地址
    ${CMAKE_CURRENT_SOURCE_DIR}/include
    /usr/local/cuda-9.0/include   
    /usr/local/include
    /usr/local/cuda/include
    /usr/include)

aux_source_directory(./src/ DIR_SRCS)
add_executable(caffedetector ${DIR_SRCS})

target_link_libraries(caffedetector
${OpenCV_LIBS}
/home/user/caffe-ssd/build/lib/libcaffe.so      # caffe库
/usr/lib
/usr/local/lib
/usr/local/lib/libglog.so.0
/usr/local/lib/libboost_system.so.1.66.0
)