基于OpenPose的动作识别

正文索引 [隐藏]

我这里采集数据使用的是Openpose的C++ API,使用Pytorch的Python API训练,最后结合Openpose的Python API识别

OpenPose安装

Build C++ and Python API, Need CUDA, CAFFE, OpenCV
Fellow OpenPose_installation

Pytorch 安装

Pytorch

动作数据采集

CMakeList.txt 注意修改{YOUR_PATH}到自己的目录

cmake_minimum_required (VERSION 2.8)

# project name
project (body_points)
# using C++11 
set(CMAKE_CXX_FLAGS "${CAMKE_CXX_FLAGS} -std=c++11 -pthread")

# find opencv
find_package(OpenCV REQUIRED)
# print message of opencv
message(STATUS "OpenCV version: ${OpenCV_VERSION}")
message(STATUS "OpenCV include path: ${OpenCV_INCLUDE_DIRS}")
message(STATUS "libraries: ${OpenCV_LIBS}")

aux_source_directory(./src/ DIR_SRCS)

add_executable(body_points  ${DIR_SRCS})

include_directories ( 
  openpose-master/include
    ${CMAKE_CURRENT_SOURCE_DIR}/include
    /usr/local/include
    /usr/include 
${OpenCV_INCLUDE_DIRS})

target_link_libraries(body_points
${OpenCV_LIBS}
/usr/lib
/usr/local/lib
/usr/local/lib/libopenpose.so
{YOUR_PATH}//caffe-master/build/lib/libcaffe.so
/usr/lib/x86_64-linux-gnu/libgflags.so.2
    )

采集代码,
注意 sting变量 model_path 修改为Openpose预训练模型位置,默认为 {Openpose路径}/openpose-master/models

#include <openpose/flags.hpp>
// OpenPose dependencies
#include <openpose/headers.hpp>

void configureWrapper(op::Wrapper &opWrapper)
{
    try
    {
        // Configuring OpenPose

        // logging_level
        op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
                  __LINE__, __FUNCTION__, __FILE__);
        op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
        op::Profiler::setDefaultX(FLAGS_profile_speed);

        // Applying user defined configuration - GFlags to program variables
        // outputSize
        const auto outputSize = op::flagsToPoint(FLAGS_output_resolution, "-1x-1");
        // netInputSize
        const auto netInputSize = op::flagsToPoint(FLAGS_net_resolution, "-1x368");
        // faceNetInputSize
        const auto faceNetInputSize = op::flagsToPoint(FLAGS_face_net_resolution, "368x368 (multiples of 16)");
        // handNetInputSize
        const auto handNetInputSize = op::flagsToPoint(FLAGS_hand_net_resolution, "368x368 (multiples of 16)");
        // poseMode
        const auto poseMode = op::flagsToPoseMode(FLAGS_body);
        // poseModel
        const auto poseModel = op::flagsToPoseModel(FLAGS_model_pose);
        // JSON saving
        if (!FLAGS_write_keypoint.empty())
            op::log("Flag `write_keypoint` is deprecated and will eventually be removed."
                    " Please, use `write_json` instead.",
                    op::Priority::Max);
        // keypointScaleMode
        const auto keypointScaleMode = op::flagsToScaleMode(FLAGS_keypoint_scale);
        // heatmaps to add
        const auto heatMapTypes = op::flagsToHeatMaps(FLAGS_heatmaps_add_parts, FLAGS_heatmaps_add_bkg,
                                                      FLAGS_heatmaps_add_PAFs);
        const auto heatMapScaleMode = op::flagsToHeatMapScaleMode(FLAGS_heatmaps_scale);
        // >1 camera view?
        const auto multipleView = (FLAGS_3d || FLAGS_3d_views > 1);
        // Face and hand detectors
        const auto faceDetector = op::flagsToDetector(FLAGS_face_detector);
        const auto handDetector = op::flagsToDetector(FLAGS_hand_detector);
        // Enabling Google Logging
        const bool enableGoogleLogging = true;
        string model_path = "../models";
        // Pose configuration (use WrapperStructPose{} for default and recommended configuration)
        const op::WrapperStructPose wrapperStructPose{
            poseMode, netInputSize, outputSize, keypointScaleMode, FLAGS_num_gpu, FLAGS_num_gpu_start,
            FLAGS_scale_number, (float)FLAGS_scale_gap, op::flagsToRenderMode(FLAGS_render_pose, multipleView),
            poseModel, !FLAGS_disable_blending, (float)FLAGS_alpha_pose, (float)FLAGS_alpha_heatmap,
            FLAGS_part_to_show,model_path, heatMapTypes, heatMapScaleMode, FLAGS_part_candidates,
            (float)FLAGS_render_threshold, FLAGS_number_people_max, FLAGS_maximize_positives, FLAGS_fps_max,
            FLAGS_prototxt_path, FLAGS_caffemodel_path, (float)FLAGS_upsampling_ratio, enableGoogleLogging};
        opWrapper.configure(wrapperStructPose);
        // Face configuration (use op::WrapperStructFace{} to disable it)
        const op::WrapperStructFace wrapperStructFace{
            FLAGS_face, faceDetector, faceNetInputSize,
            op::flagsToRenderMode(FLAGS_face_render, multipleView, FLAGS_render_pose),
            (float)FLAGS_face_alpha_pose, (float)FLAGS_face_alpha_heatmap, (float)FLAGS_face_render_threshold};
        opWrapper.configure(wrapperStructFace);
        // Hand configuration (use op::WrapperStructHand{} to disable it)
        const op::WrapperStructHand wrapperStructHand{
            FLAGS_hand, handDetector, handNetInputSize, FLAGS_hand_scale_number, (float)FLAGS_hand_scale_range,
            op::flagsToRenderMode(FLAGS_hand_render, multipleView, FLAGS_render_pose), (float)FLAGS_hand_alpha_pose,
            (float)FLAGS_hand_alpha_heatmap, (float)FLAGS_hand_render_threshold};
        opWrapper.configure(wrapperStructHand);
        // Extra functionality configuration (use op::WrapperStructExtra{} to disable it)
        const op::WrapperStructExtra wrapperStructExtra{
            FLAGS_3d, FLAGS_3d_min_views, FLAGS_identification, FLAGS_tracking, FLAGS_ik_threads};
        opWrapper.configure(wrapperStructExtra);
        // Output (comment or use default argument to disable any output)
        const op::WrapperStructOutput wrapperStructOutput{
            FLAGS_cli_verbose, FLAGS_write_keypoint, op::stringToDataFormat(FLAGS_write_keypoint_format),
            FLAGS_write_json, FLAGS_write_coco_json, FLAGS_write_coco_json_variants, FLAGS_write_coco_json_variant,
            FLAGS_write_images, FLAGS_write_images_format, FLAGS_write_video, FLAGS_write_video_fps,
            FLAGS_write_video_with_audio, FLAGS_write_heatmaps, FLAGS_write_heatmaps_format, FLAGS_write_video_3d,
            FLAGS_write_video_adam, FLAGS_write_bvh, FLAGS_udp_host, FLAGS_udp_port};
        opWrapper.configure(wrapperStructOutput);
        // No GUI. Equivalent to: opWrapper.configure(op::WrapperStructGui{});
        // Set to single-thread (for sequential processing and/or debugging and/or reducing latency)
        if (FLAGS_disable_multi_thread)
            opWrapper.disableMultiThreading();
    }
    catch (const std::exception &e)
    {
        op::error(e.what(), __LINE__, __FUNCTION__, __FILE__);
    }
}

int bodydetector()
{
    try
    {
        op::log("Starting OpenPose demo...", op::Priority::High);
        const auto opTimer = op::getTimerInit();

        // Configuring OpenPose
        op::log("Configuring OpenPose...", op::Priority::High);
        op::Wrapper opWrapper{op::ThreadManagerMode::Asynchronous};
        configureWrapper(opWrapper);

        // Starting OpenPose
        op::log("Starting thread(s)...", op::Priority::High);
        opWrapper.start();

        // Process and display image
        cv::VideoCapture cap(0);
        cv::Mat frame;
        std::ofstream outfile;
        outfile.open("../output/data.dat");
        while (1)
        {
            cap >> frame;
            auto datumProcessed = opWrapper.emplaceAndPop(frame);

            if (datumProcessed != nullptr && !datumProcessed->empty())
            {
                // Display image
                cv::imshow("view", datumProcessed->at(0)->cvOutputData);
                char key = cv::waitKey(10);
                if (key == 's')
                {
                    std::cout << "Body keypoints: " + datumProcessed->at(0)->poseKeypoints.toString();
                    outfile << "Body keypoints: " + datumProcessed->at(0)->poseKeypoints.toString() << std::endl;
                }
            }

            else
                op::log("Image could not be processed.", op::Priority::High);
        }

        return 0;
    }
    catch (const std::exception &e)
    {
        return -1;
    }
}

int main(int argc, char *argv[])
{
    // Parsing command line flags
    gflags::ParseCommandLineFlags(&argc, &argv, true);

    // Running tutorialApiCpp
    return bodydetector();
}

按下键盘 S 键,当前帧的骨骼信息会存入 ../output/data.dat ,保存格式如下:

Body keypoints: Array<T>::toString():
366.310272 44.607922 0.946063 
367.479401 66.582191 0.904820 
346.698822 65.372032 0.860462 
316.131439 69.010048 0.823414 
336.957123 50.649994 0.840219 
387.169250 67.789780 0.854524 
413.986145 73.951477 0.874534 
395.623474 54.279297 0.916995 
363.846436 131.427734 0.831362 
346.787720 131.386475 0.857533 
344.297028 173.014084 0.834007 
338.232971 210.924683 0.899703 
377.336334 132.610626 0.842341 
377.362244 176.651184 0.836939 
376.140198 210.897263 0.960210 
362.641144 43.259453 0.887264 
371.205170 43.285103 0.882027 
357.705627 44.538513 0.412742 
379.774353 44.584492 0.807628 
385.833557 220.698105 0.903382 
387.113525 219.438766 0.930573 
372.439972 213.360947 0.782960 
332.034821 219.544769 0.855795 
328.331482 218.262589 0.907401 
344.269287 215.801956 0.766095 

训练模型

使用了pytorch,numpy和
TensorboardX可视化(可选)

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tensorboardX import SummaryWriter

数据处理

数据读取

读取文件,传入一个1253的三维列表里面

def readdatas(file):
    print("Read file %s as input"%file)
    fr = open(file)
    lines = fr.readlines()
    data_x = []
    for n in range(len(lines)):
        data = []
        if lines[n][0] == 'B':
            data_joint = []
            for i in range(1,26):
                datas = lines[n+i].strip(' \n').split(' ')
                for i in range(3):
                    datas[i] = float(datas[i])
                data_joint.append(datas)
            add_domain = []
            add_domain.append(data_joint)
            data_x.append(add_domain)

    data_x = np.array(data_x)

    print("Data size: ",data_x.shape)
    return data_x

data_hand = readdatas("handsup.dat")
data_run = readdatas("run.dat")

数据处理

首先为数据生成标签,以及随机选取测试数据,并使用np.append直接拼合列表信息

def generate_label(length,type):
    label = np.empty([length],dtype=int)
    for n in range(label.shape[0]):
        label[n] = type
    return  label

def generate_test(data,length):
    import random
    data_test = np.empty([length,1,25,3],dtype=np.float64)
    for i in range(length):
        rand = random.randint(0,data.shape[0]-1)
        data_test[i] = data[rand]
        data = np.delete(data,rand,0)
    return data_test,data
hand_test,hand_train = generate_test(data_hand,100)
run_test,run_train = generate_test(data_run,400)
print("Training Input Data Size: ",hand_train.shape,run_train.shape)
print("Test Input Data Size: ",hand_test.shape,run_test.shape)
label_hand = generate_label(hand_train.shape[0],1)
label_run = generate_label(run_train.shape[0],0)
label_train = np.append(label_hand,label_run)

将训练和测试数据拼合,因为为多维数组这里使用np.vstack进行竖直拼合


data_train = torch.from_numpy(np.vstack((hand_train,run_train))) data_test = torch.from_numpy(np.vstack((hand_test,run_test))) print("Total Input Training and Tesing Size: ",data_train.shape,data_test.shape)

将数据改为[n,1,3,25]的多维矩阵,生成测试以及训练数据,Pytorch的数据库dataset然后使用dataloader读入,注意这里使用了shuffle,因为从文件读取数据的时候同一类别读取是连续的,所以在load的时候打乱顺序,并且分批训练,每个批次20组数据

data_train = data_train.reshape(data_train.shape[0],1,3,25)
data_test = data_test.reshape(data_test.shape[0],1,3,25)

print("Total Input Training and Tesing Size: ",data_train.shape,data_test.shape)
train_dataset = TensorDataset(data_train, label_train)
train_loader = DataLoader(dataset=train_dataset,batch_size=20,shuffle=True)

test_dataset = TensorDataset(data_test, label_test)
test_loader = DataLoader(dataset=test_dataset,batch_size=data_test.shape[0],shuffle=True)

网络结构

使用很简单的LeNet

class Action_Net(nn.Module):
    def __init__(self):
        super(Action_Net, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1, 25, 3)
            nn.Conv2d(
                in_channels=1,      # input height
                out_channels=16,    # n_filters
                kernel_size=(2,6),  # filter size
                stride=1,           # filter movement/step
                padding=1,          # padding=(kernel_size-1)/2 height and width don't change (3+2,25+2)
            ),  # output shape (16,5-2+1,27-6+1) = (16,4,22)
            nn.ReLU(),              # activation
            nn.MaxPool2d(kernel_size=(2,2)),  # sample in 2x1 space, output shape (16, 2,11)
        )
        self.conv2 = nn.Sequential(         # input shape (16, 11, 2)
            nn.Conv2d(
                in_channels=16,  # input height
                out_channels=32,  # n_filters
                kernel_size= (1,4),  # filter size
                stride=1,  # filter movement/step
                padding=1,  # padding=(kernel_size-1)/2 height and width don't change (2+2,11+2)
            ), # output shape (32,  13-4+1, 4-1+1) = (32,4,10)
            nn.ReLU(),                      # activation
            nn.MaxPool2d(2),                # output shape (32, 1, 5)
        )
        self.fcon = nn.Linear( 32 * 5 * 2 , 120)
        self.fcon2 = nn.Linear(120,3) # fully connected layer, output 2 classes

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.fcon(x)
        output = self.fcon2 (x)
        return output
    ```
损失函数为交叉熵,优化为随机梯度
```python
model = Action_Net()    # instantiate convolution neural network
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)   # optimize all cnn parameters

训练以及测试

每训练100批测试一次,最后保存模型

writer = SummaryWriter(comment='Action_Net')
for epoch in range(5):
    for step,(inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()  # clear last grad
        inputs = torch.tensor(inputs,dtype=torch.float32)
        out = model(inputs)
        loss = criterion(out, labels)  # calculate loss
        loss.backward()  # loss backward, calculate new data
        optimizer.step()  # add new weight to net parameters
        writer.add_graph(model, inputs)
        writer.add_scalar('Loss', loss, epoch*100+step)
        if step % 100 == 0:
            for i,(test_data,test_label) in enumerate(test_loader):
                test_data = torch.tensor(test_data, dtype=torch.float32)
                test_output = model(test_data)
                pred_y = torch.max(test_output, 1)[1].data.numpy()
                accuracy = float((pred_y == test_label.data.numpy()).astype(int).sum()) / float(test_label.size(0))
                writer.add_scalar('Accuracy', accuracy, epoch*100+step)
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
writer.close()
torch.save(model, 'model/net.pkl') 

效果

使用Tensorbord

$ tensorboard -logdir Aug26_10-06-35_MCCHENAction_Net/

然后浏览器打开 http://localhost:6006
网络结构如下

训练过程 准确度和loss如下

识别

识别代码如下
注意修改openpose_path为自己的目录,以及model = torch.load(‘model/net.pkl’)调用的模型位置

# From Python
# It requires OpenCV installed for Python
import sys
import cv2
import os
from sys import platform
import argparse
import time

# Import Openpose (Windows/Ubuntu/OSX)
dir_path = os.path.dirname(os.path.realpath(__file__))
openpose_path= "~/openpose-master/build/python"

try:
    # Windows Import
    if platform == "win32":
        # Change these variables to point to the correct folder (Release/x64 etc.)
        sys.path.append(dir_path + '/../../python/openpose/Release');
        os.environ['PATH']  = os.environ['PATH'] + ';' + dir_path + '/../../x64/Release;' +  dir_path + '/../../bin;'
        import pyopenpose as op
    else:
        # Change these variables to point to the correct folder (Release/x64 etc.)
        sys.path.append(openpose_path);
        # If you run `make install` (default path is `/usr/local/python` for Ubuntu), you can also access the OpenPose/python module from there. This will install OpenPose and the python library at your desired installation path. Ensure that this is in your python path in order to use it.
        # sys.path.append('/usr/local/python')
        from openpose import pyopenpose as op
except ImportError as e:
    print('Error: OpenPose library could not be found. Did you enable `BUILD_PYTHON` in CMake and have this Python script in the right folder?')
    raise e

# Flags
parser = argparse.ArgumentParser()
parser.add_argument("--image_dir", default="../../../examples/media/", help="Process a directory of images. Read all standard formats (jpg, png, bmp, etc.).")
parser.add_argument("--no_display", default=False, help="Enable to disable the visual display.")
args = parser.parse_known_args()

# Custom Params (refer to include/openpose/flags.hpp for more parameters)
params = dict()
params["model_folder"] = "/home/zhangyp/WorkSpace/openpose-master/models/"

# Add others in path?
for i in range(0, len(args[1])):
    curr_item = args[1][i]
    if i != len(args[1])-1: next_item = args[1][i+1]
    else: next_item = "1"
    if "--" in curr_item and "--" in next_item:
        key = curr_item.replace('-','')
        if key not in params:  params[key] = "1"
    elif "--" in curr_item and "--" not in next_item:
        key = curr_item.replace('-','')
        if key not in params: params[key] = next_item

# Construct it from system arguments
# op.init_argv(args[1])
# oppython = op.OpenposePython()

try:
    # Starting OpenPose
    opWrapper = op.WrapperPython()
    opWrapper.configure(params)
    opWrapper.start()

    start = time.time()
    cap = cv2.VideoCapture(0)
    import torch
    import torch.nn as nn
    import numpy as np
    class Action_Net(nn.Module):
        def __init__(self):
            super(Action_Net, self).__init__()
            self.conv1 = nn.Sequential(  # input shape (1, 25, 3)
                nn.Conv2d(
                    in_channels=1,      # input height
                    out_channels=16,    # n_filters
                    kernel_size=(2,6),  # filter size
                    stride=1,           # filter movement/step
                    padding=1,          # padding=(kernel_size-1)/2 height and width don't change (3+2,25+2)
                ),  # output shape (16,5-2+1,27-6+1) = (16,4,22)
                nn.ReLU(),              # activation
                nn.MaxPool2d(kernel_size=(2,2)),  # sample in 2x1 space, output shape (16, 2,11)
            )
            self.conv2 = nn.Sequential(         # input shape (16, 11, 2)
                nn.Conv2d(
                    in_channels=16,  # input height
                    out_channels=32,  # n_filters
                    kernel_size= (1,4),  # filter size
                    stride=1,  # filter movement/step
                    padding=1,  # padding=(kernel_size-1)/2 height and width don't change (2+2,11+2)
                ), # output shape (32,  13-4+1, 4-1+1) = (32,4,10)
                nn.ReLU(),                      # activation
                nn.MaxPool2d(2),                # output shape (32, 1, 5)
            )
            self.fcon = nn.Linear( 32 * 5 * 2 , 120)
            self.fcon2 = nn.Linear(120,3) # fully connected layer, output 2 classes
            #self.softmax = nn.Softmax()

        def forward(self, x):
            x = self.conv1(x)
            x = self.conv2(x)
            x = x.view(x.size(0), -1)
            x = self.fcon(x)
            output = self.fcon2 (x)
            #output = self.softmax(output)
            return output

    model = torch.load('model/net.pkl')
    while cap.isOpened():
        success, frame = cap.read()
        frame = cv2.resize(frame,(960,720))
        datum = op.Datum()
        datum.cvInputData = frame

        opWrapper.emplaceAndPop([datum])
        out_frame = datum.cvOutputData
        counter = 0
        if datum.poseKeypoints.shape == (1, 25, 3):
            for data in datum.poseKeypoints:
                for n in range(25):
                    for i in range(3):
                        if data[n][i] != 0:
                            counter = counter +1
        if counter > 60:
            input = torch.from_numpy(datum.poseKeypoints)
            input = input.resize(1,1, 3, 25)
            input = torch.tensor(input, dtype=torch.float32)
            result = model(input)
            prediction = torch.max(result, 1)[1].data.numpy()
            # print(prediction)
            text = ""
            if prediction == 0:
                text = "Stand"
            if prediction ==1:
                text = "Hands Up"
            if prediction == 2:
                text = "Sit"
            print(text)
            cv2.putText(out_frame,text,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1.2,(0,255,0),2)

            #print("Body keypoints: \n")
            #input = torch.from_numpy(datum.poseKeypoints)
            #print(input)
        cv2.putText(out_frame,"PoseDetector By Michael.Chen",(700,700),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,255,0),1)
        cv2.imshow("OpenPose 1.5.0 - Tutorial Python API", out_frame)
        key = cv2.waitKey(15)
        if key == 27: break

    end = time.time()
    print("OpenPose demo successfully finished. Total time: " + str(end - start) + " seconds")
except Exception as e:
    print(e)
    sys.exit(-1)

效果