Age and gender estimation based on Convolutional Neural Network and TensorFlow

本文主要是介绍Age and gender estimation based on Convolutional Neural Network and TensorFlow，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

训练数据处理

imdb数据提取

gender: 0 for female and 1 for male, NaN if unknown

age: 年龄分为101类,分别为从0到100岁.

将训练数据转换为tfrecords格式,命令为,

python convert_to_records_multiCPU.py --imdb --nworks 8 --imdb_db /home/research/data/hjimce/classifyData/age_gender/imdb_crop/imdb.mat --base_path /home/research/data/hjimce/classifyData/age_gender/

lmdb.mat文件包括,

data = {"file_name": full_path, "gender": gender, "age": age, "score": face_score,"second_score": second_face_score}

数据中含有较多的噪声,因此在加载.mat文件,并获得数据词典后,对图像进行筛选,即可以设置face_score,second_face_score需要满足特定的条件,

if face_score[index] < 1:continue
# if (~np.isnan(second_face_score[index])) and second_face_score[index] > 0.0:
#     continue
if ~(0 <= ages[index] <= 100):continueif np.isnan(genders[index]):continue

提取人脸图像

数据处理包括,对输入图像检测人脸框,并对人脸采用仿射变换进行对齐,

# load the input image, resize it, and convert it to grayscale
image = cv2.imread(os.path.join(image_base_dir, str(file_name[index][0])), cv2.IMREAD_COLOR)
# image = imutils.resize(image, width=256)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rects = detector(gray, 2)
if len(rects) != 1:continue
else:image_raw = fa.align(image, gray, rects[0])image_raw = image_raw.tostring()

最后将age,gender,对齐后的人脸数据,图像名,保存,

# image_raw = images[index].tostring()
example = tf.train.Example(features=tf.train.Features(feature={# 'height': _int64_feature(rows),# 'width': _int64_feature(cols),# 'depth': _int64_feature(depth),'age': _int64_feature(int(ages[index])),'gender': _int64_feature(int(genders[index])),'image_raw': _bytes_feature(image_raw),'file_name': _bytes_feature(str(file_name[index][0]))}))
writer.write(example.SerializeToString())

人脸对齐算法为

from imutils.face_utils import FaceAligner

对于人脸,我们可以指定调整的眼睛位置,人脸大小,

class FaceAligner:def __init__(self, predictor, desiredLeftEye=(0.35, 0.35),desiredFaceWidth=256, desiredFaceHeight=None):# store the facial landmark predictor, desired output left# eye position, and desired output face width + heightself.predictor = predictorself.desiredLeftEye = desiredLeftEyeself.desiredFaceWidth = desiredFaceWidthself.desiredFaceHeight = desiredFaceHeight# if the desired face height is None, set it to be the# desired face width (normal behavior)if self.desiredFaceHeight is None:self.desiredFaceHeight = self.desiredFaceWidth

之后计算放射变换参数,并对人脸框进行放射变换,

def align(self, image, gray, rect):# convert the landmark (x, y)-coordinates to a NumPy arrayshape = self.predictor(gray, rect)shape = shape_to_np(shape)# extract the left and right eye (x, y)-coordinates(lStart, lEnd) = FACIAL_LANDMARKS_IDXS["left_eye"](rStart, rEnd) = FACIAL_LANDMARKS_IDXS["right_eye"]leftEyePts = shape[lStart:lEnd]rightEyePts = shape[rStart:rEnd]# compute the center of mass for each eyeleftEyeCenter = leftEyePts.mean(axis=0).astype("int")rightEyeCenter = rightEyePts.mean(axis=0).astype("int")# compute the angle between the eye centroidsdY = rightEyeCenter[1] - leftEyeCenter[1]dX = rightEyeCenter[0] - leftEyeCenter[0]angle = np.degrees(np.arctan2(dY, dX)) - 180# compute the desired right eye x-coordinate based on the# desired x-coordinate of the left eyedesiredRightEyeX = 1.0 - self.desiredLeftEye[0]# determine the scale of the new resulting image by taking# the ratio of the distance between eyes in the *current*# image to the ratio of distance between eyes in the# *desired* imagedist = np.sqrt((dX ** 2) + (dY ** 2))desiredDist = (desiredRightEyeX - self.desiredLeftEye[0])desiredDist *= self.desiredFaceWidthscale = desiredDist / dist# compute center (x, y)-coordinates (i.e., the median point)# between the two eyes in the input imageeyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) // 2,(leftEyeCenter[1] + rightEyeCenter[1]) // 2)# grab the rotation matrix for rotating and scaling the faceM = cv2.getRotationMatrix2D(eyesCenter, angle, scale)# update the translation component of the matrixtX = self.desiredFaceWidth * 0.5tY = self.desiredFaceHeight * self.desiredLeftEye[1]M[0, 2] += (tX - eyesCenter[0])M[1, 2] += (tY - eyesCenter[1])# apply the affine transformation(w, h) = (self.desiredFaceWidth, self.desiredFaceHeight)output = cv2.warpAffine(image, M, (w, h),flags=cv2.INTER_CUBIC)# return the aligned facereturn output

处理后的数据放在/home/research/data/hjimce/classifyData/age_gender/下.

模型结构分析

模型结构为inception v1,

输入大小为 $160\times160\times3$

首先为6个卷积层,

# 149 x 149 x 32
net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',scope='Conv2d_1a_3x3')
end_points['Conv2d_1a_3x3'] = net
# 147 x 147 x 32
net = slim.conv2d(net, 32, 3, padding='VALID',scope='Conv2d_2a_3x3')
end_points['Conv2d_2a_3x3'] = net
# 147 x 147 x 64
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
end_points['Conv2d_2b_3x3'] = net
# 73 x 73 x 64
net = slim.max_pool2d(net, 3, stride=2, padding='VALID',scope='MaxPool_3a_3x3')
end_points['MaxPool_3a_3x3'] = net
# 73 x 73 x 80
net = slim.conv2d(net, 80, 1, padding='VALID',scope='Conv2d_3b_1x1')
end_points['Conv2d_3b_1x1'] = net
# 71 x 71 x 192
net = slim.conv2d(net, 192, 3, padding='VALID',scope='Conv2d_4a_3x3')
end_points['Conv2d_4a_3x3'] = net
# 35 x 35 x 256
net = slim.conv2d(net, 256, 3, stride=2, padding='VALID',scope='Conv2d_4b_3x3')
end_points['Conv2d_4b_3x3'] = net

之后是5层block35,

# Inception-Resnet-A
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):"""Builds the 35x35 resnet block."""with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):with tf.variable_scope('Branch_0'):tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')with tf.variable_scope('Branch_1'):tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')with tf.variable_scope('Branch_2'):tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3')tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3')mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,activation_fn=None, scope='Conv2d_1x1')net += scale * upif activation_fn:net = activation_fn(net)return net

reuduction A,

def reduction_a(net, k, l, m, n):with tf.variable_scope('Branch_0'):tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID',scope='Conv2d_1a_3x3')with tf.variable_scope('Branch_1'):tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3,scope='Conv2d_0b_3x3')tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3,stride=2, padding='VALID',scope='Conv2d_1a_3x3')with tf.variable_scope('Branch_2'):tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',scope='MaxPool_1a_3x3')net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)return net

10 层 Inception-Resnet-B,8x8x896

net = slim.repeat(net, 10, block17, scale=0.10)end_points[‘Mixed_6b’] = net

def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):"""Builds the 17x17 resnet block."""with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):with tf.variable_scope('Branch_0'):tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1')with tf.variable_scope('Branch_1'):tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7],scope='Conv2d_0b_1x7')tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1],scope='Conv2d_0c_7x1')mixed = tf.concat([tower_conv, tower_conv1_2], 3)up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,activation_fn=None, scope='Conv2d_1x1')net += scale * upif activation_fn:net = activation_fn(net)return net

reuduction B,

def reduction_b(net):with tf.variable_scope('Branch_0'):tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,padding='VALID', scope='Conv2d_1a_3x3')with tf.variable_scope('Branch_1'):tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2,padding='VALID', scope='Conv2d_1a_3x3')with tf.variable_scope('Branch_2'):tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3,scope='Conv2d_0b_3x3')tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2,padding='VALID', scope='Conv2d_1a_3x3')with tf.variable_scope('Branch_3'):tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',scope='MaxPool_1a_3x3')net = tf.concat([tower_conv_1, tower_conv1_1,tower_conv2_2, tower_pool], 3)return net

5层 Inception-Resnet-C,3x3x1792

net = slim.repeat(net, 5, block8, scale=0.20)

# Inception-Resnet-C
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):"""Builds the 8x8 resnet block."""with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):with tf.variable_scope('Branch_0'):tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')with tf.variable_scope('Branch_1'):tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3],scope='Conv2d_0b_1x3')tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1],scope='Conv2d_0c_3x1')mixed = tf.concat([tower_conv, tower_conv1_2], 3)up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,activation_fn=None, scope='Conv2d_1x1')net += scale * upif activation_fn:net = activation_fn(net)return net

一层 Inception-Resnet-C,3x3x1792

net = block8(net, activation_fn=None)

训练

CUDA_VISIBLE_DEVICES=0 python train.py –images /home/research/data/hjimce/classifyData/age_gender/train –lr 1e-3 –weight_decay 1e-5 –epoch 6 –batch_size 128 –keep_prob 0.8 –cuda