基于DCGAN的猫脸生成器-tensorflow

本文主要是介绍基于DCGAN的猫脸生成器-tensorflow，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

代码参考了这个https://zhuanlan.zhihu.com/p/28329335

网络参考了这个https://github.com/AlexiaJM/Deep-learning-with-cats

数据集：用的跟第二篇参考里的数据集一样，http://academictorrents.com/details/c501571c29d16d7f41d159d699d0e7fb37092cbd

这里的数据集是整只猫的照片，而我们需要的只是猫脸的照片，就用第二篇参考里的方法把猫脸给提取出来。数据集里给了猫脸五官的坐标。

也可以用我做好的猫脸数据集，是tfrecords的：https://pan.baidu.com/s/1DBNWE_8r2sp96MuKKwSqyw

generator结构：全连接层+反卷积+反卷积+反卷积+反卷积

discirminator结构：卷积+卷积+卷积+卷积+全连接层

因为这里用到的图片是64*64的，所以用了4层卷积，如果是128的话就多一层卷积，32的话就少一层。

关于激活函数，我尝试了leakyrelu、relu、selu，结果是selu最好（目测，只训练到了200个epoch），这里要注意selu是自带normalization的，如果用其他的话要考虑加上normalization。

关于学习率，我看了GAN的论文里，generator训练7轮，disciminator才训练一轮。还是第二篇参考，那里把G的学习率设为了0.0002，D为0.00005。因为收敛比较慢，我尝试了把学习率设为0.001，结果过拟合了。讲道理这个学习率是要靠凑的，好在别人已经凑好了。

关于损失函数，就用交叉熵了。

170个epoch后的生成结果：

300个eopch

上代码，基本跟第一个参考一样，那位老哥的代码写的可读性真好：

# -*- coding: utf-8 -*-
"""
Created on Fri Sep  7 16:42:44 2018@author: Administrator
"""import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
#from tensorflow.examples.tutorials.mnist import input_datatf.reset_default_graph()# 定义参数
batch_size = 64
noise_size = 100
epochs = 800
sample_num = 16
learning_rate = 0.001
d_learning_rate = 0.00005
g_learning_rate = 0.0002
beta1 = 0.4
keep_prob = 0.8
dataset_path = 'drive//cat_train_64_total.tfrecords'
save_path = 'drive//ganpic1'def load_cat_record():path=dataset_pathfilename_queue = tf.train.string_input_producer([path]) #读入流中reader = tf.TFRecordReader()_, serialized_example = reader.read(filename_queue)   #返回文件名和文件features = tf.parse_single_example(serialized_example,features={'label': tf.FixedLenFeature([], tf.int64),'img_raw' : tf.FixedLenFeature([], tf.string),})  #取出包含image和label的feature对象image = tf.decode_raw(features['img_raw'], tf.uint8)image = tf.reshape(image, [64, 64, 3])label = tf.cast(features['label'], tf.int32)    train_img = []with tf.Session() as sess: #开始一个会话init_op = tf.initialize_all_variables()sess.run(init_op)coord=tf.train.Coordinator()threads= tf.train.start_queue_runners(coord=coord)for i in range(9416):example, l = sess.run([image,label])#在会话中取出image和labeltrain_img.append(example)coord.request_stop()coord.join(threads)#以下将image进行标准化#在colab上可以用sklearn，但在自己电脑上sklearn出毛病了，所以改掉了。images = np.array(train_img, dtype = 'float32')print(images.shape)x_train_row = images.reshape(images.shape[0], 64 * 64 * 3)x_max_row = x_train_row.max(axis = 0)x_min_row = x_train_row.min(axis = 0)x_normal = (x_train_row - x_min_row) / (x_max_row - x_min_row)# 重新变为64 x 64 x 3images = x_normal.reshape(x_normal.shape[0], 64, 64, 3)print(images.shape)return imagesdef generator(noise_img, output_dim, is_train = True, keep_prob = keep_prob):with tf.variable_scope("generator", reuse = tf.AUTO_REUSE):#4 * 4 * 512layer1 = tf.layers.dense(noise_img, 4 * 4 * 1024)layer1 = tf.reshape(layer1, [-1, 4, 4, 1024])#layer1 = tf.layers.batch_normalization(layer1, training = is_train)layer1 = tf.nn.selu(layer1)layer1 = tf.nn.dropout(layer1, keep_prob = keep_prob)# 4 * 4 * 1024 to 8 * 8 * 512layer2 = tf.layers.conv2d_transpose(layer1, 512, 3, strides = 2, padding = 'same')#layer2 = tf.layers.batch_normalization(layer2, training = is_train)layer2 = tf.nn.selu(layer2)layer2 = tf.nn.dropout(layer2, keep_prob = keep_prob)# tf.layers.conv2d_transpose 反卷积；strides：理解成放大倍数；# 256 filters, 输出卷积核的数量； 3 kernel_size, 在卷积操作中卷积核的大小；# 8 * 8 * 512 to 16 * 16 * 256layer3 = tf.layers.conv2d_transpose(layer2, 256, 4, strides = 2, padding = 'same')#layer3 = tf.layers.batch_normalization(layer3, training = is_train)layer3 = tf.nn.selu(layer3)layer3 = tf.nn.dropout(layer3, keep_prob = keep_prob)# 16 * 16 * 256 to 32 * 32 * 128layer4 = tf.layers.conv2d_transpose(layer3, 128, 3, strides = 2, padding = 'same')#layer4 = tf.layers.batch_normalization(layer4, training = is_train)layer4 = tf.nn.selu(layer4)layer4 = tf.nn.dropout(layer4, keep_prob = keep_prob)#to 64 * 64 * 3logits = tf.layers.conv2d_transpose(layer4, output_dim, 3, strides = 2, padding = 'same')output = tf.tanh(logits)return outputdef discriminator(input_img, keep_prob = keep_prob):with tf.variable_scope("discriminator", reuse = tf.AUTO_REUSE):#64 * 64 * 3 to 32 * 32 * 128layer1 = tf.layers.conv2d(input_img, 128, 3, strides = 2, padding = 'same')layer1 = tf.nn.selu(layer1)layer1 = tf.nn.dropout(layer1, keep_prob = keep_prob)# 32 * 32 * 128 to 16 * 16 * 256layer2 = tf.layers.conv2d(layer1, 256, 3, strides = 2, padding = 'same')#layer2 = tf.layers.batch_normalization(layer2, training = True)layer2 = tf.nn.selu(layer2)layer2 = tf.nn.dropout(layer2, keep_prob = keep_prob)#16 * 16 * 256 to 8 * 8 * 512layer3 = tf.layers.conv2d(layer2, 512, 3, strides = 2, padding = 'same')#layer3 = tf.layers.batch_normalization(layer3, training = True)layer3 = tf.nn.selu(layer3)layer3 = tf.nn.dropout(layer3, keep_prob = keep_prob)#8 * 8 * 512 to 4 * 4 * 1024layer4 = tf.layers.conv2d(layer3, 1024, 3, strides = 2, padding = 'same')#layer4 = tf.layers.batch_normalization(layer4, training = True)layer4 = tf.nn.selu(layer4)layer4 = tf.nn.dropout(layer4, keep_prob = keep_prob)#4 * 4 * 1024 to 4 * 4 * 1024 * 1flatten = tf.reshape(layer4, (-1, 4*4*1024))logits = tf.layers.dense(flatten, 1)output = tf.sigmoid(logits)return logits, outputdef loss(input_real, input_noise, image_depth):g_output = generator(input_noise, image_depth, is_train=True)d_logits_real, d_output_real = discriminator(input_real)d_logits_fake, d_output_fake = discriminator(g_output)#, reuse=True)#计算lossg_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_fake, labels = tf.ones_like(d_output_fake) * 0.9))#加上了smoothd_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_real, labels = tf.ones_like(d_output_real) * 0.9))d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_fake, labels = tf.zeros_like(d_output_fake)))d_loss = tf.add(d_loss_real, d_loss_fake)return g_loss, d_lossdef optimizer(g_loss, d_loss, g_learning_rate = g_learning_rate, d_learning_rate = d_learning_rate):train_vars = tf.trainable_variables()g_vars = [var for var in train_vars if var.name.startswith("generator")]d_vars = [var for var in train_vars if var.name.startswith("discriminator")]#Optimizerwith tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):g_opt = tf.train.AdamOptimizer(g_learning_rate).minimize(g_loss, var_list=g_vars)d_opt = tf.train.AdamOptimizer(d_learning_rate).minimize(d_loss, var_list=d_vars)return g_opt, d_opt    def show_image(sess, input_noise, data_shape, epoch, sample_num = sample_num):noise_shape = input_noise.get_shape().as_list()[-1]sample_noise = get_noise(sample_num, noise_size)samples = sess.run(generator(input_noise, data_shape[-1], keep_prob = 1), feed_dict = {input_noise: sample_noise})samples = (samples + 1) / 2fig = plt.figure(figsize = (12, 12))gs = gridspec.GridSpec(4, 4)for i, sample in enumerate(samples):ax = plt.subplot(gs[i])#ax.set_xticklabels([])ax.set_aspect('equal')ax.get_xaxis().set_visible(False)ax.get_yaxis().set_visible(False)plt.imshow(sample.reshape(data_shape[1], data_shape[2], data_shape[3]), cmap = 'Greys_r')	plt.savefig(save_path + '//{}.png'.format(str(epoch).zfill(4)), bbox_inches='tight')#i += 1plt.close(fig)def get_input(img_width, img_height, img_depth, noise_size):input_real = tf.placeholder(tf.float32, [None, img_width, img_height, img_depth], name = 'input_real')input_noise = tf.placeholder(tf.float32, [None, noise_size], name = 'input_noise')return input_real, input_noisedef get_noise(batch_size, noise_size):noise = np.random.uniform(-1, 1, size=(batch_size, noise_size))return noisedef train(noise_size, data_shape, batch_size):input_real, input_noise = get_input(data_shape[1], data_shape[2], data_shape[3], noise_size)g_loss, d_loss = loss(input_real, input_noise, data_shape[-1])g_opt, d_opt = optimizer(g_loss, d_loss, d_learning_rate = d_learning_rate, g_learning_rate = g_learning_rate)with tf.Session() as sess:sess.run(tf.global_variables_initializer())for i in range(epochs):for j in range(images.shape[0]//batch_size - 1):batch_image = images[j * batch_size : (j + 1) * batch_size]batch_image = batch_image * 2 - 1batch_noise = get_noise(batch_size, noise_size)sess.run(g_opt, feed_dict = {input_real: batch_image, input_noise: batch_noise})sess.run(d_opt, feed_dict = {input_real: batch_image, input_noise: batch_noise})#显示图片print('round :', i)print('G loss: ', g_loss.eval({input_real: batch_image, input_noise: batch_noise}))print('D loss: ', d_loss.eval({input_real: batch_image, input_noise: batch_noise}))print('')show_image(sess, input_noise, data_shape, epoch = i)with tf.Graph().as_default():images = load_cat_record()train(noise_size, [-1, 64, 64, 3], batch_size)

这篇关于基于DCGAN的猫脸生成器-tensorflow的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！