使用TensorFlow Object Detection API进行红绿灯检测

本文主要是介绍使用TensorFlow Object Detection API进行红绿灯检测，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

项目目录结构

本文中未明确说明的情况下，所使用的路径均在./research目录下。

research
- object detection
- datasets
  - my_traffic_light (参照Pascal VOC目录结构)
    - Annotations
    - ImageSets
    - JPEGImages
    - SegmentationClass
    - SegmentationObject
    - tfrecord
      ***.tfrecord
      ***.pbtxt
- ssd_traffic_light_detection
  - ssd_traffic_light_detection_model
    - saved_model
      - variables
        saved_model.pb
        pipeline.config
        model.ckpt.meta / index / data-00000-of-00001
        frozen_inference_graph.pb
        checkpoint
  - train (主要存放用于启动训练的一些文件，和训练中间文件)
    - export
    - eval_0
      train_cmd.sh (存放一些会用到的训练命令等)
      model.ckpt-*****.meta
      model.ckpt-*****.index
      model.ckpt-*****.data-00000-of-00001
      graph.pbtxt
      events.out.tfevents.*****
      model_name_datasets.config
      pipeline.config
      checkpoint

数据集制作

图像采集

使用华为手机拍摄视频，存为*.mp4文件。

提取图像

extract_images_from_video
测试读取视频文件，查看文件的FPS/H/W和总帧数。

import cv2
import os
video_path = './JPEGImages/VID_20200419_122755.mp4'
output_dir = './JPEGImages/VID_20200419_122755'
if not os.path.exists(output_dir):os.mkdir(output_dir)cap = cv2.VideoCapture(video_path)
success, frame = cap.read()
fps = cap.get(cv2.CAP_PROP_FPS)
n_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
h_frame = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
w_frame = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
print('The video propertities is: fps={}, height={}, width={}, and has {} frames.'.format(fps, h_frame, w_frame, n_frame))

提取图片到视频文件夹下，提取的图片存放到以视频文件名为名的文件夹下。

def extract_images_from_video(video_path):video_name = os.path.basename(video_path).split('.')[0] # 得到视频名字，不含后缀output_dir = os.path.join(os.path.dirname(video_path), video_name)if not os.path.exists(output_dir):os.mkdir(output_dir)cameraCapture = cv2.VideoCapture(video_path)success, frame = cameraCapture.read()idx = 0n_sels = 0while success:idx += 1if idx%45==0: # 每45张图片选取一张n_sels += 1frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)frame_name = "{0}_{1:0>5d}.jpg".format(video_name, n_sels)frame_saved_path = os.path.join(output_dir, frame_name)cv2.imwrite(frame_saved_path, frame)success, frame = cameraCapture.read()cameraCapture.release()print("Finished extract images from {}".format(video_name))import glob
video_files = "./JPEGImages/VID_20200419_*.mp4"
video_filepaths = glob.glob(video_files)
print(video_filepaths)
for path in video_filepaths:extract_images_from_video(path)

图像标注

训练

模型导出

进行推理

推理文件

导入包

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfilefrom distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("../../")
from object_detection.utils import ops as utils_opsif StrictVersion(tf.__version__) < StrictVersion('1.9.0'):raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')# This is needed to display the images.
%matplotlib inline
from utils import label_map_util
from utils import visualization_utils as vis_util

# What model to download.
MODEL_NAME = 'my_traffic_light_detection'
# MODEL_FILE = MODEL_NAME + '.tar.gz'
MODEL_DIR = './model'
# DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_DIR + '/frozen_inference_graph.pb'# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('./dataset', 'traffic_light_label_map.pbtxt')

导入计算图

detection_graph = tf.Graph()
with detection_graph.as_default():od_graph_def = tf.GraphDef()with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:serialized_graph = fid.read()od_graph_def.ParseFromString(serialized_graph)tf.import_graph_def(od_graph_def, name='')ops = tf.get_default_graph().get_operations()all_tensor_names = {output.name for op in ops for output in op.outputs}print(all_tensor_names)category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
print(category_index)def load_image_into_numpy_array(image):(im_width, im_height) = image.sizereturn np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

import glob
PATH_TO_TEST_IMAGES_DIR = './test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(0, 10) ]
# TEST_IMAGE_PATHS = glob.glob("./test_images/*.jpg")
print(TEST_IMAGE_PATHS)
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):with graph.as_default():with tf.Session() as sess:# Get handles to input and output tensorsops = tf.get_default_graph().get_operations()all_tensor_names = {output.name for op in ops for output in op.outputs}tensor_dict = {}for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']:tensor_name = key + ':0'if tensor_name in all_tensor_names:tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)if 'detection_masks' in tensor_dict:# The following processing is only for single imagedetection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(detection_masks, detection_boxes, image.shape[0], image.shape[1])detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)# Follow the convention by adding back the batch dimensiontensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')# Run inferenceoutput_dict = sess.run(tensor_dict,feed_dict={image_tensor: np.expand_dims(image, 0)})# all outputs are float32 numpy arrays, so convert types as appropriateoutput_dict['num_detections'] = int(output_dict['num_detections'][0])output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)output_dict['detection_boxes'] = output_dict['detection_boxes'][0]output_dict['detection_scores'] = output_dict['detection_scores'][0]if 'detection_masks' in output_dict:output_dict['detection_masks'] = output_dict['detection_masks'][0]return output_dict

import cv2
for image_path in TEST_IMAGE_PATHS:image = Image.open(image_path)# the array based representation of the image will be used later in order to prepare the# result image with boxes and labels on it.image_np = load_image_into_numpy_array(image)# Expand dimensions since the model expects images to have shape: [1, None, None, 3]image_np_expanded = np.expand_dims(image_np, axis=0)# Actual detection.output_dict = run_inference_for_single_image(image_np, detection_graph)print(output_dict)# Visualization of the results of a detection.vis_util.visualize_boxes_and_labels_on_image_array(image_np,output_dict['detection_boxes'],output_dict['detection_classes'],output_dict['detection_scores'],category_index,instance_masks=output_dict.get('detection_masks'),use_normalized_coordinates=True,line_thickness=4)image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
#   cv2.imshow('image',image_np)
#   cv2.waitKey(10)
#   cv2.destroyAllWindows()
#   if cv2.waitKey(1000)&0xff == 113:
# cv2.destroyAllWindows()
#   plt.figure(figsize=IMAGE_SIZE)
#   plt.imshow(image_np)
# plt.show()