NAS with RL（Using TensorFlow）

本文主要是介绍NAS with RL（Using TensorFlow），希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

代码一：

train.py

net_manager.py

cnn.py

reinforce.py

代码二：

train.py

controller.py

model.py

manager.py

nascell.py

代码一：

代码地址：nascell-automl-master

修改后代码（需要新建几个python文件）：

train.py

import numpy as np
import tensorflow.compat.v1 as tfimport tensorflow_addons as tfa
import argparse
import datetimefrom cnn import CNN
from net_manager import NetManager
from reinforce import Reinforcefrom tensorflow.examples.tutorials.mnist import input_datadef parse_args():desc = "TensorFlow implementation of 'Neural Architecture Search with Reinforcement Learning'"parser = argparse.ArgumentParser(description=desc)parser.add_argument('--max_layers', default=2)args = parser.parse_args()args.max_layers = int(args.max_layers)return args'''Policy network is a main network for searching optimal architectureit uses NAS - Neural Architecture Search recurrent network cell.https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1363Args:state: current state of required topologymax_layers: maximum number of layersReturns:3-D tensor with new state (new topology)
'''def policy_network(state, max_layers):with tf.name_scope("policy_network"):nas_cell = tfa.rnn.NASCell(4 * max_layers)outputs, state = tf.nn.dynamic_rnn(nas_cell,tf.expand_dims(state, -1),dtype=tf.float32)bias = tf.Variable([0.05] * 4 * max_layers)outputs = tf.nn.bias_add(outputs, bias)print("outputs: ", outputs, outputs[:, -1:, :],tf.slice(outputs, [0, 4 * max_layers - 1, 0], [1, 1, 4 * max_layers]))# return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnnreturn outputs[:, -1:, :]def train(mnist):global argssess = tf.Session()global_step = tf.Variable(0, trainable=False)starter_learning_rate = 0.1learning_rate = tf.train.exponential_decay(0.99, global_step,500, 0.96, staircase=True)optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)net_manager = NetManager(num_input=784,num_classes=10,learning_rate=0.001,mnist=mnist,bathc_size=100)MAX_EPISODES = 2500step = 0state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers], dtype=np.float32)pre_acc = 0.0total_rewards = 0for i_episode in range(MAX_EPISODES):action = reinforce.get_action(state)print("ca:", action)if all(ai > 0 for ai in action[0][0]):reward, pre_acc = net_manager.get_reward(action, step, pre_acc)print("=====>", reward, pre_acc)else:reward = -1.0total_rewards += reward# In our sample action is equal statestate = action[0]reinforce.storeRollout(state, reward)step += 1ls = reinforce.train_step(1)log_str = "current time:  " + str(datetime.datetime.now().time()) + " episode:  " + str(i_episode) + " loss:  " + str(ls) + " last_state:  " + str(state) + " last_reward:  " + str(reward) + "\n"log = open("lg3.txt", "a+")log.write(log_str)log.close()print(log_str)def main():global argsargs = parse_args()mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)train(mnist)if __name__ == '__main__':tf.disable_v2_behavior()main()

net_manager.py

import tensorflow.compat.v1 as tf
from cnn import CNNclass NetManager():def __init__(self, num_input, num_classes, learning_rate, mnist,max_step_per_action=5500 * 3,bathc_size=100,dropout_rate=0.85):self.num_input = num_inputself.num_classes = num_classesself.learning_rate = learning_rateself.mnist = mnistself.max_step_per_action = max_step_per_actionself.bathc_size = bathc_sizeself.dropout_rate = dropout_ratedef get_reward(self, action, step, pre_acc):action = [action[0][0][x:x + 4] for x in range(0, len(action[0][0]), 4)]cnn_drop_rate = [c[3] for c in action]with tf.Graph().as_default() as g:with g.container('experiment' + str(step)):model = CNN(self.num_input, self.num_classes, action)loss_op = tf.reduce_mean(model.loss)optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)train_op = optimizer.minimize(loss_op)with tf.Session() as train_sess:init = tf.global_variables_initializer()train_sess.run(init)for step in range(self.max_step_per_action):batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size)feed = {model.X: batch_x,model.Y: batch_y,model.dropout_keep_prob: self.dropout_rate,model.cnn_dropout_rates: cnn_drop_rate}_ = train_sess.run(train_op, feed_dict=feed)if step % 100 == 0:# Calculate batch loss and accuracyloss, acc = train_sess.run([loss_op, model.accuracy],feed_dict={model.X: batch_x,model.Y: batch_y,model.dropout_keep_prob: 1.0,model.cnn_dropout_rates: [1.0] * len(cnn_drop_rate)})print("Step " + str(step) +", Minibatch Loss= " + "{:.4f}".format(loss) +", Current accuracy= " + "{:.3f}".format(acc))batch_x, batch_y = self.mnist.test.next_batch(10000)loss, acc = train_sess.run([loss_op, model.accuracy],feed_dict={model.X: batch_x,model.Y: batch_y,model.dropout_keep_prob: 1.0,model.cnn_dropout_rates: [1.0] * len(cnn_drop_rate)})print("!!!!!!acc:", acc, pre_acc)if acc - pre_acc <= 0.01:return acc, accelse:return 0.01, acc

cnn.py

import tensorflow.compat.v1 as tfclass CNN():def __init__(self, num_input, num_classes, cnn_config):cnn = [c[0] for c in cnn_config]cnn_num_filters = [c[1] for c in cnn_config]max_pool_ksize = [c[2] for c in cnn_config]self.X = tf.placeholder(tf.float32,[None, num_input],name="input_X")self.Y = tf.placeholder(tf.int32, [None, num_classes], name="input_Y")self.dropout_keep_prob = tf.placeholder(tf.float32, [], name="dense_dropout_keep_prob")self.cnn_dropout_rates = tf.placeholder(tf.float32, [len(cnn), ], name="cnn_dropout_keep_prob")Y = self.YX = tf.expand_dims(self.X, -1)pool_out = Xwith tf.name_scope("Conv_part"):for idd, filter_size in enumerate(cnn):with tf.name_scope("L" + str(idd)):conv_out = tf.layers.conv1d(pool_out,filters=cnn_num_filters[idd],kernel_size=(int(filter_size)),strides=1,padding="SAME",name="conv_out_" + str(idd),activation=tf.nn.relu,kernel_initializer=tf.initializers.glorot_normal(),bias_initializer=tf.zeros_initializer)pool_out = tf.layers.max_pooling1d(conv_out,pool_size=(int(max_pool_ksize[idd])),strides=1,padding='SAME',name="max_pool_" + str(idd))pool_out = tf.nn.dropout(pool_out, self.cnn_dropout_rates[idd])flatten_pred_out = tf.layers.flatten(pool_out)self.logits = tf.layers.dense(flatten_pred_out, num_classes)self.prediction = tf.nn.softmax(self.logits, name="prediction")self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=Y, name="loss")correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(Y, 1))self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy")

reinforce.py

import tensorflow.compat.v1 as tf
import random
import numpy as npclass Reinforce():def __init__(self, sess, optimizer, policy_network, max_layers, global_step,division_rate=100.0,reg_param=0.001,discount_factor=0.99,exploration=0.3):self.sess = sessself.optimizer = optimizerself.policy_network = policy_networkself.division_rate = division_rateself.reg_param = reg_paramself.discount_factor = discount_factorself.max_layers = max_layersself.global_step = global_stepself.reward_buffer = []self.state_buffer = []self.create_variables()var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)self.sess.run(tf.variables_initializer(var_lists))def get_action(self, state):return self.sess.run(self.predicted_action, {self.states: state})if random.random() < self.exploration:return np.array([[random.sample(range(1, 35), 4 * self.max_layers)]])else:return self.sess.run(self.predicted_action, {self.states: state})def create_variables(self):with tf.name_scope("model_inputs"):# raw state representationtf.disable_eager_execution()self.states = tf.placeholder(tf.float32, [None, self.max_layers * 4], name="states")with tf.name_scope("predict_actions"):# initialize policy networkwith tf.variable_scope("policy_network"):self.policy_outputs = self.policy_network(self.states, self.max_layers)self.action_scores = tf.identity(self.policy_outputs, name="action_scores")self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32,name="predicted_action")# regularization losspolicy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")# compute loss and gradientswith tf.name_scope("compute_gradients"):# gradients for selecting action from policy networkself.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")with tf.variable_scope("policy_network", reuse=True):self.logprobs = self.policy_network(self.states, self.max_layers)print("self.logprobs", self.logprobs)# compute policy loss and regularization lossself.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :],labels=self.states)self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables])  # Regularizationself.loss = self.pg_loss + self.reg_param * self.reg_loss# compute gradientsself.gradients = self.optimizer.compute_gradients(self.loss)# compute policy gradientsfor i, (grad, var) in enumerate(self.gradients):if grad is not None:self.gradients[i] = (grad * self.discounted_rewards, var)# training updatewith tf.name_scope("train_policy_network"):# apply gradients to update policy networkself.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)def storeRollout(self, state, reward):self.reward_buffer.append(reward)self.state_buffer.append(state[0])def train_step(self, steps_count):states = np.array(self.state_buffer[-steps_count:]) / self.division_raterewars = self.reward_buffer[-steps_count:]_, ls = self.sess.run([self.train_op, self.loss],{self.states: states,self.discounted_rewards: rewars})return ls

代码二：

代码地址：neural-architecture-search-master

修改后代码：

train.py

import numpy as np
import csvimport tensorflow as tf
from keras import backend as K
from keras.datasets import cifar10
from keras.utils import to_categoricalfrom controller import Controller, StateSpace
from manager import NetworkManager
from model import model_fn
tf.compat.v1.disable_eager_execution()
tf.get_logger().setLevel('ERROR')
# create a shared session between Keras and Tensorflow
policy_sess = tf.compat.v1.Session()
tf.compat.v1.keras.backend.set_session(policy_sess)NUM_LAYERS = 4  # number of layers of the state space
MAX_TRIALS = 250  # maximum number of models generatedMAX_EPOCHS = 10  # maximum number of epochs to train
CHILD_BATCHSIZE = 128  # batchsize of the child models
EXPLORATION = 0.8  # high exploration for the first 1000 steps
REGULARIZATION = 1e-3  # regularization strength
CONTROLLER_CELLS = 32  # number of cells in RNN controller
EMBEDDING_DIM = 20  # dimension of the embeddings for each state
ACCURACY_BETA = 0.8  # beta value for the moving average of the accuracy
CLIP_REWARDS = 0.0  # clip rewards in the [-0.05, 0.05] range
RESTORE_CONTROLLER = True  # restore controller to continue training# construct a state space
state_space = StateSpace()# add states
state_space.add_state(name='kernel', values=[1, 3])
state_space.add_state(name='filters', values=[16, 32, 64])# print the state space being searched
state_space.print_state_space()# prepare the training data for the NetworkManager
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)dataset = [x_train, y_train, x_test, y_test]  # pack the dataset for the NetworkManagerprevious_acc = 0.0
total_reward = 0.0with policy_sess.as_default():# create the Controller and build the internal policy networkcontroller = Controller(policy_sess, NUM_LAYERS, state_space,reg_param=REGULARIZATION,exploration=EXPLORATION,controller_cells=CONTROLLER_CELLS,embedding_dim=EMBEDDING_DIM,restore_controller=RESTORE_CONTROLLER)# create the Network Manager
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, child_batchsize=CHILD_BATCHSIZE, clip_rewards=CLIP_REWARDS,acc_beta=ACCURACY_BETA)# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()# clear the previous files
controller.remove_files()# train for number of trails
for trial in range(MAX_TRIALS):with policy_sess.as_default():tf.compat.v1.keras.backend.set_session(policy_sess)actions = controller.get_action(state)  # get an action for the previous state# print the action probabilitiesstate_space.print_actions(actions)print("Predicted actions : ", state_space.parse_state_space_list(actions))# build a model, train and get reward and accuracy from the network managerreward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions))print("Rewards : ", reward, "Accuracy : ", previous_acc)with policy_sess.as_default():tf.compat.v1.keras.backend.set_session(policy_sess)total_reward += rewardprint("Total reward : ", total_reward)# actions and states are equivalent, save the state and rewardstate = actionscontroller.store_rollout(state, reward)# train the controller on the saved state and the discounted rewardsloss = controller.train_step()print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))# write the results of this trial into a filewith open('train_history.csv', mode='a+') as f:data = [previous_acc, reward]data.extend(state_space.parse_state_space_list(state))writer = csv.writer(f)writer.writerow(data)print()print("Total Reward : ", total_reward)

controller.py

import numpy as np
import time
import pprint
from collections import OrderedDictfrom keras import backend as K
import tensorflow as tfimport osif not os.path.exists('weights/'):os.makedirs('weights/')
tf.compat.v1.disable_eager_execution()class StateSpace:'''State Space managerProvides utilit functions for holding "states" / "actions" that the controllermust use to train and predict.Also provides a more convenient way to define the search space'''def __init__(self):self.states = OrderedDict()self.state_count_ = 0def add_state(self, name, values):'''Adds a "state" to the state manager, along with some metadata for efficientpacking and unpacking of information required by the RNN Controller.Stores metadata such as:-   Global ID-   Name-   Valid Values-   Number of valid values possible-   Map from value ID to state value-   Map from state value to value IDArgs:name: name of the state / actionvalues: valid values that this state can takeReturns:Global ID of the state. Can be used to refer to this state later.'''index_map = {}for i, val in enumerate(values):index_map[i] = valvalue_map = {}for i, val in enumerate(values):value_map[val] = imetadata = {'id': self.state_count_,'name': name,'values': values,'size': len(values),'index_map_': index_map,'value_map_': value_map,}self.states[self.state_count_] = metadataself.state_count_ += 1return self.state_count_ - 1def embedding_encode(self, id, value):'''Embedding index encode the specific state valueArgs:id: global id of the statevalue: state valueReturns:embedding encoded representation of the state value'''state = self[id]size = state['size']value_map = state['value_map_']value_idx = value_map[value]one_hot = np.zeros((1, size), dtype=np.float32)one_hot[np.arange(1), value_idx] = value_idx + 1return one_hotdef get_state_value(self, id, index):'''Retrieves the state value from the state value IDArgs:id: global id of the stateindex: index of the state value (usually from argmax)Returns:The actual state value at given value index'''state = self[id]index_map = state['index_map_']if (type(index) == list or type(index) == np.ndarray) and len(index) == 1:index = index[0]value = index_map[index]return valuedef get_random_state_space(self, num_layers):'''Constructs a random initial state space for feeding as an initial valueto the Controller RNNArgs:num_layers: number of layers to duplicate the search spaceReturns:A list of one hot encoded states'''states = []for id in range(self.size * num_layers):state = self[id]size = state['size']sample = np.random.choice(size, size=1)sample = state['index_map_'][sample[0]]state = self.embedding_encode(id, sample)states.append(state)return statesdef parse_state_space_list(self, state_list):'''Parses a list of one hot encoded states to retrieve a list of state valuesArgs:state_list: list of one hot encoded statesReturns:list of state values'''state_values = []for id, state_one_hot in enumerate(state_list):state_val_idx = np.argmax(state_one_hot, axis=-1)[0]value = self.get_state_value(id, state_val_idx)state_values.append(value)return state_valuesdef print_state_space(self):''' Pretty print the state space '''print('*' * 40, 'STATE SPACE', '*' * 40)pp = pprint.PrettyPrinter(indent=2, width=100)for id, state in self.states.items():pp.pprint(state)print()def print_actions(self, actions):''' Print the action space properly '''print('Actions :')for id, action in enumerate(actions):if id % self.size == 0:print("*" * 20, "Layer %d" % (((id + 1) // self.size) + 1), "*" * 20)state = self[id]name = state['name']vals = [(n, p) for n, p in zip(state['values'], *action)]print("%s : " % name, vals)print()def __getitem__(self, id):return self.states[id % self.size]@propertydef size(self):return self.state_count_class Controller:'''Utility class to manage the RNN Controller'''def __init__(self, policy_session, num_layers, state_space,reg_param=0.001,discount_factor=0.99,exploration=0.8,controller_cells=32,embedding_dim=20,clip_norm=0.0,restore_controller=False):self.policy_session = policy_session  # type: tf.Sessionself.num_layers = num_layersself.state_space = state_space  # type: StateSpaceself.state_size = self.state_space.sizeself.controller_cells = controller_cellsself.embedding_dim = embedding_dimself.reg_strength = reg_paramself.discount_factor = discount_factorself.exploration = explorationself.restore_controller = restore_controllerself.clip_norm = clip_normself.reward_buffer = []self.state_buffer = []self.cell_outputs = []self.policy_classifiers = []self.policy_actions = []self.policy_labels = []self.build_policy_network()def get_action(self, state):'''Gets a one hot encoded action list, either from random sampling or fromthe Controller RNNArgs:state: a list of one hot encoded states, whose first value is used as initialstate for the controller RNNReturns:A one hot encoded action list'''if np.random.random() < self.exploration:print("Generating random action to explore")actions = []for i in range(self.state_size * self.num_layers):state_ = self.state_space[i]size = state_['size']sample = np.random.choice(size, size=1)sample = state_['index_map_'][sample[0]]action = self.state_space.embedding_encode(i, sample)actions.append(action)return actionselse:print("Prediction action from Controller")initial_state = self.state_space[0]size = initial_state['size']if state[0].shape != (1, size):state = state[0].reshape((1, size)).astype('int32')else:state = state[0]print("State input to Controller for Action : ", state.flatten())with self.policy_session.as_default():tf.compat.v1.keras.backend.set_session(self.policy_session)with tf.name_scope('action_prediction'):pred_actions = self.policy_session.run(self.policy_actions, feed_dict={self.state_input: state})return pred_actionsdef build_policy_network(self):with self.policy_session.as_default():tf.compat.v1.keras.backend.set_session(self.policy_session)with tf.name_scope('controller'):with tf.compat.v1.variable_scope('policy_network'):# state input is the first input fed into the controller RNN.# the rest of the inputs are fed to the RNN internallywith tf.name_scope('state_input'):state_input = tf.compat.v1.placeholder(dtype=tf.int32, shape=(1, None), name='state_input')self.state_input = state_input# we can use LSTM as the controller as wellnas_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(self.controller_cells)cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32)embedding_weights = []# for each possible state, create a new embedding. Reuse the weights for multiple layers.with tf.compat.v1.variable_scope('embeddings', reuse=tf.compat.v1.AUTO_REUSE):for i in range(self.state_size):state_ = self.state_space[i]size = state_['size']# size + 1 is used so that 0th index is never updated and is "default" valueweights = tf.compat.v1.get_variable('state_embeddings_%d' % i,shape=[size + 1, self.embedding_dim],initializer=tf.compat.v1.initializers.random_uniform(-1., 1.))embedding_weights.append(weights)# initially, cell input will be 1st state inputembeddings = tf.nn.embedding_lookup(embedding_weights[0], state_input)cell_input = embeddings# we provide a flat list of chained input-output to the RNNfor i in range(self.state_size * self.num_layers):state_id = i % self.state_sizestate_space = self.state_space[i]size = state_space['size']with tf.name_scope('controller_output_%d' % i):# feed the ith layer input (i-1 layer output) to the RNNoutputs, final_state = tf.compat.v1.nn.dynamic_rnn(nas_cell,cell_input,initial_state=cell_state,dtype=tf.float32)# add a new classifier for each layers outputclassifier = tf.compat.v1.layers.dense(outputs[:, -1, :], units=size,name='classifier_%d' % (i),reuse=False)preds = tf.nn.softmax(classifier)# feed the previous layer (i-1 layer output) to the next layers input, along with state# take the class labelcell_input = tf.argmax(preds, axis=-1)cell_input = tf.expand_dims(cell_input, -1, name='pred_output_%d' % (i))cell_input = tf.cast(cell_input, tf.int32)cell_input = tf.add(cell_input,1)  # we avoid using 0 so as to have a "default" embedding at 0th index# embedding lookup of this state using its state weights ; reuse weightscell_input = tf.nn.embedding_lookup(embedding_weights[state_id], cell_input,name='cell_output_%d' % (i))cell_state = final_state# store the tensors for later loss computationself.cell_outputs.append(cell_input)self.policy_classifiers.append(classifier)self.policy_actions.append(preds)policy_net_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,scope='policy_network')with tf.name_scope('optimizer'):self.global_step = tf.Variable(0, trainable=False)starter_learning_rate = 0.1learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, self.global_step,500, 0.95, staircase=True)tf.summary.scalar('learning_rate', learning_rate)self.optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=learning_rate)with tf.name_scope('losses'):self.discounted_rewards = tf.compat.v1.placeholder(tf.float32, shape=(None,), name='discounted_rewards')tf.summary.scalar('discounted_reward', tf.reduce_sum(self.discounted_rewards))# calculate sum of all the individual classifierscross_entropy_loss = 0for i in range(self.state_size * self.num_layers):classifier = self.policy_classifiers[i]state_space = self.state_space[i]size = state_space['size']with tf.name_scope('state_%d' % (i + 1)):labels = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, size),name='cell_label_%d' % i)self.policy_labels.append(labels)ce_loss = tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(logits=classifier, labels=labels)tf.summary.scalar('state_%d_ce_loss' % (i + 1), tf.reduce_mean(ce_loss))cross_entropy_loss += ce_losspolicy_gradient_loss = tf.reduce_mean(cross_entropy_loss)reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_net_variables])  # Regularization# sum up policy gradient and regularization lossself.total_loss = policy_gradient_loss + self.reg_strength * reg_losstf.summary.scalar('total_loss', self.total_loss)self.gradients = self.optimizer.compute_gradients(self.total_loss)with tf.name_scope('policy_gradients'):# normalize gradients so that they dont explode if argument passedif self.clip_norm is not None and self.clip_norm != 0.0:norm = tf.constant(self.clip_norm, dtype=tf.float32)gradients, vars = zip(*self.gradients)  # unpack the two lists of gradients and the variablesgradients, _ = tf.clip_by_global_norm(gradients, norm)  # clip by the normself.gradients = list(zip(gradients, vars))  # we need to set values later, convert to list# compute policy gradientsfor i, (grad, var) in enumerate(self.gradients):if grad is not None:self.gradients[i] = (grad * self.discounted_rewards, var)# training updatewith tf.name_scope("train_policy_network"):# apply gradients to update policy networkself.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)self.summaries_op = tf.compat.v1.summary.merge_all()timestr = time.strftime("%Y-%m-%d-%H-%M-%S")filename = 'logs/%s' % timestrself.summary_writer = tf.compat.v1.summary.FileWriter(filename, graph=self.policy_session.graph)self.policy_session.run(tf.compat.v1.global_variables_initializer())self.saver = tf.compat.v1.train.Saver(max_to_keep=1)if self.restore_controller:path = tf.train.latest_checkpoint('weights/')if path is not None and tf.compat.v1.train.checkpoint_exists(path):print("Loading Controller Checkpoint !")self.saver.restore(self.policy_session, path)def store_rollout(self, state, reward):self.reward_buffer.append(reward)self.state_buffer.append(state)# dump buffers to file if it grows larger than 50 itemsif len(self.reward_buffer) > 20:with open('buffers.txt', mode='a+') as f:for i in range(20):state_ = self.state_buffer[i]state_list = self.state_space.parse_state_space_list(state_)state_list = ','.join(str(v) for v in state_list)f.write("%0.4f,%s\n" % (self.reward_buffer[i], state_list))print("Saved buffers to file `buffers.txt` !")self.reward_buffer = [self.reward_buffer[-1]]self.state_buffer = [self.state_buffer[-1]]def discount_rewards(self):'''Compute discounted rewards over the entire reward bufferReturns:Discounted reward value'''rewards = np.asarray(self.reward_buffer)discounted_rewards = np.zeros_like(rewards)running_add = 0for t in reversed(range(0, rewards.size)):if rewards[t] != 0:running_add = 0running_add = running_add * self.discount_factor + rewards[t]discounted_rewards[t] = running_addreturn discounted_rewards[-1]def train_step(self):'''Perform a single train step on the Controller RNNReturns:the training loss'''states = self.state_buffer[-1]label_list = []# parse the state space to get real value of the states,# then one hot encode them for comparison with the predictionsstate_list = self.state_space.parse_state_space_list(states)for id, state_value in enumerate(state_list):state_one_hot = self.state_space.embedding_encode(id, state_value)label_list.append(state_one_hot)# the initial input to the controller RNNstate_input_size = self.state_space[0]['size']state_input = states[0].reshape((1, state_input_size)).astype('int32')print("State input to Controller for training : ", state_input.flatten())# the discounted reward valuereward = self.discount_rewards()reward = np.asarray([reward]).astype('float32')feed_dict = {self.state_input: state_input,self.discounted_rewards: reward}# prepare the feed dict with the values of all the policy labels for each# of the Controller outputsfor i, label in enumerate(label_list):feed_dict[self.policy_labels[i]] = labelwith self.policy_session.as_default():tf.compat.v1.keras.backend.set_session(self.policy_session)print("Training RNN (States ip) : ", state_list)print("Training RNN (Reward ip) : ", reward.flatten())_, loss, global_step = self.policy_session.run([self.train_op, self.total_loss,self.global_step],feed_dict=feed_dict)# self.summary_writer.add_summary(self.summaries_op, global_step)self.saver.save(self.policy_session, save_path='weights/controller.ckpt', global_step=self.global_step)# reduce exploration after many train stepsif global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5:self.exploration *= 0.99return lossdef remove_files(self):files = ['train_history.csv', 'buffers.txt']for file in files:if os.path.exists(file):os.remove(file)

model.py

from keras.models import Model
from keras.layers import Input, Dense, Conv2D, GlobalAveragePooling2D# generic model design
def model_fn(actions):# unpack the actions from the listkernel_1, filters_1, kernel_2, filters_2, kernel_3, filters_3, kernel_4, filters_4 = actionsip = Input(shape=(32, 32, 3))x = Conv2D(filters_1, (kernel_1, kernel_1), strides=(2, 2), padding='same', activation='relu')(ip)x = Conv2D(filters_2, (kernel_2, kernel_2), strides=(1, 1), padding='same', activation='relu')(x)x = Conv2D(filters_3, (kernel_3, kernel_3), strides=(2, 2), padding='same', activation='relu')(x)x = Conv2D(filters_4, (kernel_4, kernel_4), strides=(1, 1), padding='same', activation='relu')(x)x = GlobalAveragePooling2D()(x)x = Dense(10, activation='softmax')(x)model = Model(ip, x)return model

manager.py

import numpy as npfrom keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
tf.get_logger().setLevel('ERROR')class NetworkManager:'''Helper class to manage the generation of subnetwork training given a dataset'''def __init__(self, dataset, epochs=5, child_batchsize=128, acc_beta=0.8, clip_rewards=0.0):'''Manager which is tasked with creating subnetworks, training them on a dataset, and retrievingrewards in the term of accuracy, which is passed to the controller RNN.Args:dataset: a tuple of 4 arrays (X_train, y_train, X_val, y_val)epochs: number of epochs to train the subnetworkschild_batchsize: batchsize of training the subnetworksacc_beta: exponential weight for the accuracyclip_rewards: float - to clip rewards in [-range, range] to preventlarge weight updates. Use when training is highly unstable.'''self.dataset = datasetself.epochs = epochsself.batchsize = child_batchsizeself.clip_rewards = clip_rewardsself.beta = acc_betaself.beta_bias = acc_betaself.moving_acc = 0.0def get_rewards(self, model_fn, actions):'''Creates a subnetwork given the actions predicted by the controller RNN,trains it on the provided dataset, and then returns a reward.Args:model_fn: a function which accepts one argument, a list ofparsed actions, obtained via an inverse mapping from theStateSpace.actions: a list of parsed actions obtained via an inverse mappingfrom the StateSpace. It is in a specific order as given below:Consider 4 states were added to the StateSpace via the `add_state`method. Then the `actions` array will be of length 4, with thevalues of those states in the order that they were added.If number of layers is greater than one, then the `actions` arraywill be of length `4 * number of layers` (in the above scenario).The index from [0:4] will be for layer 0, from [4:8] for layer 1,etc for the number of layers.These action values are for direct use in the construction of models.Returns:a reward for training a model with the given actions'''with tf.compat.v1.Session(graph=tf.Graph()) as network_sess:tf.compat.v1.keras.backend.set_session(network_sess)# generate a submodel given predicted actionsmodel = model_fn(actions)  # type: Modelmodel.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])# unpack the datasetX_train, y_train, X_val, y_val = self.dataset# train the model using Keras methodsmodel.fit(X_train, y_train, batch_size=self.batchsize, epochs=self.epochs,verbose=1, validation_data=(X_val, y_val),callbacks=[ModelCheckpoint('weights/temp_network.h5',monitor='val_accuracy', verbose=1,save_freq="epoch",save_best_only=True,save_weights_only=True)])# load best performance epoch in this training sessionmodel.load_weights('weights/temp_network.h5')# evaluate the modelloss, acc = model.evaluate(X_val, y_val, batch_size=self.batchsize)# compute the rewardreward = (acc - self.moving_acc)# if rewards are clipped, clip them in the range -0.05 to 0.05if self.clip_rewards:reward = np.clip(reward, -0.05, 0.05)# update moving accuracy with bias correction for 1st updateif 0.0 < self.beta < 1.0:self.moving_acc = self.beta * self.moving_acc + (1 - self.beta) * accself.moving_acc = self.moving_acc / (1 - self.beta_bias)self.beta_bias = 0reward = np.clip(reward, -0.1, 0.1)print()print("Manager: EWA Accuracy = ", self.moving_acc)# clean up resources and GPU memorynetwork_sess.close()return reward, acc

nascell.py

from keras.engine import Layer
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
from keras import backend as K
from keras.layers import RNN
from keras.layers.recurrent import _generate_dropout_mask, _generate_dropout_onesimport warnings# import tensorflow as tf
# import tensorflow.contrib.rnn as rnnclass NASCell(Layer):"""Neural Architecture Search (NAS) recurrent network cell.This implements the recurrent cell from the paper:https://arxiv.org/abs/1611.01578Barret Zoph and Quoc V. Le."Neural Architecture Search with Reinforcement Learning" Proc. ICLR 2017.The class uses an optional projection layer.# Argumentsunits: Positive integer, dimensionality of the output space.projection_units: (optional) Positive integer, The output dimensionalityfor the projection matrices.  If None, no projection is performed.activation: Activation function to use(see [activations](../activations.md)).If you pass None, no activation is applied(ie. "linear" activation: `a(x) = x`).recurrent_activation: Activation function to usefor the recurrent step(see [activations](../activations.md)).projection_activation: Activation function to usefor the projection step(see [activations](../activations.md)).use_bias: Boolean, whether the layer uses a bias vector.kernel_initializer: Initializer for the `kernel` weights matrix,used for the linear transformation of the inputs.(see [initializers](../initializers.md)).recurrent_initializer: Initializer for the `recurrent_kernel`weights matrix,used for the linear transformation of the recurrent state.(see [initializers](../initializers.md)).projection_initializer: Initializer for the `projection_kernel`weights matrix,used for the linear transformation of the projection step.(see [initializers](../initializers.md)).bias_initializer: Initializer for the bias vector(see [initializers](../initializers.md)).unit_forget_bias: Boolean.If True, add 1 to the bias of the forget gate at initialization.Setting it to true will also force `bias_initializer="zeros"`.This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)kernel_regularizer: Regularizer function applied tothe `kernel` weights matrix(see [regularizer](../regularizers.md)).recurrent_regularizer: Regularizer function applied tothe `recurrent_kernel` weights matrix(see [regularizer](../regularizers.md)).projection_regularizer: Regularizer function applied tothe `projection_kernel` weights matrix(see [regularizer](../regularizers.md)).bias_regularizer: Regularizer function applied to the bias vector(see [regularizer](../regularizers.md)).kernel_constraint: Constraint function applied tothe `kernel` weights matrix(see [constraints](../constraints.md)).recurrent_constraint: Constraint function applied tothe `recurrent_kernel` weights matrix(see [constraints](../constraints.md)).projection_constraint: Constraint function applied tothe `projection_kernel` weights matrix(see [constraints](../constraints.md)).bias_constraint: Constraint function applied to the bias vector(see [constraints](../constraints.md)).dropout: Float between 0 and 1.Fraction of the units to drop forthe linear transformation of the inputs.recurrent_dropout: Float between 0 and 1.Fraction of the units to drop forthe linear transformation of the recurrent state.implementation: Implementation mode, either 1 or 2.Mode 1 will structure its operations as a larger number ofsmaller dot products and additions, whereas mode 2 willbatch them into fewer, larger operations. These modes willhave different performance profiles on different hardware andfor different applications."""def __init__(self, units,projection_units=None,activation='tanh',recurrent_activation='sigmoid',projection_activation='linear',use_bias=True,kernel_initializer='glorot_uniform',recurrent_initializer='orthogonal',projection_initializer='glorot_uniform',bias_initializer='zeros',unit_forget_bias=False,kernel_regularizer=None,recurrent_regularizer=None,projection_regularizer=None,bias_regularizer=None,kernel_constraint=None,recurrent_constraint=None,projection_constraint=None,bias_constraint=None,dropout=0.,recurrent_dropout=0.,implementation=2,**kwargs):super(NASCell, self).__init__(**kwargs)self.units = unitsself.projection_units = projection_unitsself.activation = activations.get(activation)self.recurrent_activation = activations.get(recurrent_activation)self.projection_activation = activations.get(projection_activation)self.cell_activation = activations.get('relu')self.use_bias = use_biasself.kernel_initializer = initializers.get(kernel_initializer)self.recurrent_initializer = initializers.get(recurrent_initializer)self.projection_initializer = initializers.get(projection_initializer)self.bias_initializer = initializers.get(bias_initializer)self.unit_forget_bias = unit_forget_biasself.kernel_regularizer = regularizers.get(kernel_regularizer)self.recurrent_regularizer = regularizers.get(recurrent_regularizer)self.projection_regularizer = regularizers.get(projection_regularizer)self.bias_regularizer = regularizers.get(bias_regularizer)self.kernel_constraint = constraints.get(kernel_constraint)self.recurrent_constraint = constraints.get(recurrent_constraint)self.projection_constraint = constraints.get(projection_constraint)self.bias_constraint = constraints.get(bias_constraint)self.dropout = min(1., max(0., dropout))self.recurrent_dropout = min(1., max(0., recurrent_dropout))self.implementation = implementationif self.projection_units is not None:self.state_size = (self.projection_units, self.units)else:self.state_size = (self.units, self.units)self._dropout_mask = Noneself._recurrent_dropout_mask = Nonedef build(self, input_shape):input_dim = input_shape[-1]if self.projection_units is not None:recurrent_output_dim = self.projection_unitselse:recurrent_output_dim = self.unitsself.kernel = self.add_weight(shape=(input_dim, self.units * 8),name='kernel',initializer=self.kernel_initializer,regularizer=self.kernel_regularizer,constraint=self.kernel_constraint)self.recurrent_kernel = self.add_weight(shape=(recurrent_output_dim, self.units * 8),name='recurrent_kernel',initializer=self.recurrent_initializer,regularizer=self.recurrent_regularizer,constraint=self.recurrent_constraint)if self.projection_units is not None:self.projection_kernel = self.add_weight(shape=(self.units, self.projection_units),name='projection_kernel',initializer=self.projection_initializer,regularizer=self.projection_regularizer,constraint=self.projection_constraint)if self.use_bias:if self.unit_forget_bias:def bias_initializer(shape, *args, **kwargs):return K.concatenate([self.bias_initializer((self.units,), *args, **kwargs),initializers.Ones()((self.units,), *args, **kwargs),self.bias_initializer((self.units * 6,), *args, **kwargs),])else:bias_initializer = self.bias_initializerself.bias = self.add_weight(shape=(self.units * 8,),name='bias',initializer=bias_initializer,regularizer=self.bias_regularizer,constraint=self.bias_constraint)else:self.bias = Noneself.kernel_0 = self.kernel[:, :self.units]self.kernel_1 = self.kernel[:, self.units: self.units * 2]self.kernel_2 = self.kernel[:, self.units * 2: self.units * 3]self.kernel_3 = self.kernel[:, self.units * 3: self.units * 4]self.kernel_4 = self.kernel[:, self.units * 4: self.units * 5]self.kernel_5 = self.kernel[:, self.units * 5: self.units * 6]self.kernel_6 = self.kernel[:, self.units * 6: self.units * 7]self.kernel_7 = self.kernel[:, self.units * 7:]self.recurrent_kernel_0 = self.recurrent_kernel[:, :self.units]self.recurrent_kernel_1 = self.recurrent_kernel[:, self.units: self.units * 2]self.recurrent_kernel_2 = self.recurrent_kernel[:, self.units * 2: self.units * 3]self.recurrent_kernel_3 = self.recurrent_kernel[:, self.units * 3: self.units * 4]self.recurrent_kernel_4 = self.recurrent_kernel[:, self.units * 4: self.units * 5]self.recurrent_kernel_5 = self.recurrent_kernel[:, self.units * 5: self.units * 6]self.recurrent_kernel_6 = self.recurrent_kernel[:, self.units * 6: self.units * 7]self.recurrent_kernel_7 = self.recurrent_kernel[:, self.units * 7:]if self.use_bias:self.bias_0 = self.bias[:self.units]self.bias_1 = self.bias[self.units: self.units * 2]self.bias_2 = self.bias[self.units * 2: self.units * 3]self.bias_3 = self.bias[self.units * 3: self.units * 4]self.bias_4 = self.bias[self.units * 4: self.units * 5]self.bias_5 = self.bias[self.units * 5: self.units * 6]self.bias_6 = self.bias[self.units * 6: self.units * 7]self.bias_7 = self.bias[self.units * 7:]else:self.bias_0 = Noneself.bias_1 = Noneself.bias_2 = Noneself.bias_3 = Noneself.bias_4 = Noneself.bias_5 = Noneself.bias_6 = Noneself.bias_7 = Noneself.built = Truedef call(self, inputs, states, training=None):if 0 < self.dropout < 1 and self._dropout_mask is None:self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(inputs, K.shape(inputs)[-1]),self.dropout,training=training,count=8)if (0 < self.recurrent_dropout < 1 andself._recurrent_dropout_mask is None):_recurrent_dropout_mask = _generate_dropout_mask(_generate_dropout_ones(inputs, self.units),self.recurrent_dropout,training=training,count=8)self._recurrent_dropout_mask = _recurrent_dropout_mask# dropout matrices for input unitsdp_mask = self._dropout_mask# dropout matrices for recurrent unitsrec_dp_mask = self._recurrent_dropout_maskh_tm1 = states[0]  # previous memory statec_tm1 = states[1]  # previous carry stateif self.implementation == 1:if 0 < self.dropout < 1.:inputs_0 = inputs * dp_mask[0]inputs_1 = inputs * dp_mask[1]inputs_2 = inputs * dp_mask[2]inputs_3 = inputs * dp_mask[3]inputs_4 = inputs * dp_mask[4]inputs_5 = inputs * dp_mask[5]inputs_6 = inputs * dp_mask[6]inputs_7 = inputs * dp_mask[7]else:inputs_0 = inputsinputs_1 = inputsinputs_2 = inputsinputs_3 = inputsinputs_4 = inputsinputs_5 = inputsinputs_6 = inputsinputs_7 = inputsx_0 = K.dot(inputs_0, self.kernel_0)x_1 = K.dot(inputs_1, self.kernel_1)x_2 = K.dot(inputs_2, self.kernel_2)x_3 = K.dot(inputs_3, self.kernel_3)x_4 = K.dot(inputs_4, self.kernel_4)x_5 = K.dot(inputs_5, self.kernel_5)x_6 = K.dot(inputs_6, self.kernel_6)x_7 = K.dot(inputs_7, self.kernel_7)if self.use_bias:x_0 = K.bias_add(x_0, self.bias_0)x_1 = K.bias_add(x_1, self.bias_1)x_2 = K.bias_add(x_2, self.bias_2)x_3 = K.bias_add(x_3, self.bias_3)x_4 = K.bias_add(x_4, self.bias_4)x_5 = K.bias_add(x_5, self.bias_5)x_6 = K.bias_add(x_6, self.bias_6)x_7 = K.bias_add(x_7, self.bias_7)if 0 < self.recurrent_dropout < 1.:h_tm1_0 = h_tm1 * rec_dp_mask[0]h_tm1_1 = h_tm1 * rec_dp_mask[1]h_tm1_2 = h_tm1 * rec_dp_mask[2]h_tm1_3 = h_tm1 * rec_dp_mask[3]h_tm1_4 = h_tm1 * rec_dp_mask[4]h_tm1_5 = h_tm1 * rec_dp_mask[5]h_tm1_6 = h_tm1 * rec_dp_mask[6]h_tm1_7 = h_tm1 * rec_dp_mask[7]else:h_tm1_0 = h_tm1h_tm1_1 = h_tm1h_tm1_2 = h_tm1h_tm1_3 = h_tm1h_tm1_4 = h_tm1h_tm1_5 = h_tm1h_tm1_6 = h_tm1h_tm1_7 = h_tm1# First Layerlayer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4))layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6))layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))# Second Layerlayer2_0 = self.activation(layer1_0 * layer1_1)layer2_1 = self.activation(layer1_2 + layer1_3)layer2_2 = self.activation(layer1_4 * layer1_5)layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)# Inject the Celllayer2_0 = self.activation(layer2_0 + c_tm1)# Third Layerlayer3_0_pre = layer2_0 * layer2_1c = layer3_0_pre  # create a new celllayer3_0 = layer3_0_prelayer3_1 = self.activation(layer2_2 + layer2_3)# Final Layerh = self.activation(layer3_0 * layer3_1)if self.projection_units is not None:h = self.projection_activation(K.dot(h, self.projection_kernel))else:if 0. < self.dropout < 1.:inputs *= dp_mask[0]z = K.dot(inputs, self.kernel)if 0. < self.recurrent_dropout < 1.:h_tm1 *= rec_dp_mask[0]zr = K.dot(h_tm1, self.recurrent_kernel)if self.use_bias:zr = K.bias_add(zr, self.bias)z0 = z[:, :self.units]z1 = z[:, self.units: 2 * self.units]z2 = z[:, 2 * self.units: 3 * self.units]z3 = z[:, 3 * self.units: 4 * self.units]z4 = z[:, 4 * self.units: 5 * self.units]z5 = z[:, 5 * self.units: 6 * self.units]z6 = z[:, 6 * self.units: 7 * self.units]z7 = z[:, 7 * self.units:]zr0 = zr[:, :self.units]zr1 = zr[:, self.units: 2 * self.units]zr2 = zr[:, 2 * self.units: 3 * self.units]zr3 = zr[:, 3 * self.units: 4 * self.units]zr4 = zr[:, 4 * self.units: 5 * self.units]zr5 = zr[:, 5 * self.units: 6 * self.units]zr6 = zr[:, 6 * self.units: 7 * self.units]zr7 = zr[:, 7 * self.units:]# First Layerlayer1_0 = self.recurrent_activation(z0 + zr0)layer1_1 = self.cell_activation(z1 + zr1)layer1_2 = self.recurrent_activation(z2 + zr2)layer1_3 = self.cell_activation(z3 * zr3)layer1_4 = self.activation(z4 + zr4)layer1_5 = self.recurrent_activation(z5 + zr5)layer1_6 = self.activation(z6 + zr6)layer1_7 = self.recurrent_activation(z7 + zr7)# Second Layerlayer2_0 = self.activation(layer1_0 * layer1_1)layer2_1 = self.activation(layer1_2 + layer1_3)layer2_2 = self.activation(layer1_4 * layer1_5)layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)# Inject the Celllayer2_0 = self.activation(layer2_0 + c_tm1)# Third Layerlayer3_0_pre = layer2_0 * layer2_1c = layer3_0_prelayer3_0 = layer3_0_prelayer3_1 = self.activation(layer2_2 + layer2_3)# Final Layerh = self.activation(layer3_0 * layer3_1)if self.projection_units is not None:h = self.projection_activation(K.dot(h, self.projection_kernel))if 0 < self.dropout + self.recurrent_dropout:if training is None:h._uses_learning_phase = Truereturn h, [h, c]def get_config(self):config = {'units': self.units,'projection_units': self.projection_units,'activation': activations.serialize(self.activation),'recurrent_activation': activations.serialize(self.recurrent_activation),'projection_activation': activations.serialize(self.projection_activation),'use_bias': self.use_bias,'kernel_initializer': initializers.serialize(self.kernel_initializer),'recurrent_initializer': initializers.serialize(self.recurrent_initializer),'projection_initializer': initializers.serialize(self.projection_initializer),'bias_initializer': initializers.serialize(self.bias_initializer),'unit_forget_bias': self.unit_forget_bias,'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),'projection_regularizer': regularizers.serialize(self.projection_regularizer),'bias_regularizer': regularizers.serialize(self.bias_regularizer),'kernel_constraint': constraints.serialize(self.kernel_constraint),'recurrent_constraint': constraints.serialize(self.recurrent_constraint),'projection_constraint': constraints.serialize(self.projection_constraint),'bias_constraint': constraints.serialize(self.bias_constraint),'dropout': self.dropout,'recurrent_dropout': self.recurrent_dropout,'implementation': self.implementation}base_config = super(NASCell, self).get_config()return dict(list(base_config.items()) + list(config.items()))class NASRNN(RNN):"""Neural Architecture Search (NAS) recurrent network cell.This implements the recurrent cell from the paper:https://arxiv.org/abs/1611.01578Barret Zoph and Quoc V. Le."Neural Architecture Search with Reinforcement Learning" Proc. ICLR 2017.The class uses an optional projection layer.# Argumentsunits: Positive integer, dimensionality of the output space.projection_units: (optional) Positive integer, The output dimensionalityfor the projection matrices.  If None, no projection is performed.activation: Activation function to use(see [activations](../activations.md)).If you pass None, no activation is applied(ie. "linear" activation: `a(x) = x`).recurrent_activation: Activation function to usefor the recurrent step(see [activations](../activations.md)).projection_activation: Activation function to usefor the projection step(see [activations](../activations.md)).use_bias: Boolean, whether the layer uses a bias vector.kernel_initializer: Initializer for the `kernel` weights matrix,used for the linear transformation of the inputs.(see [initializers](../initializers.md)).recurrent_initializer: Initializer for the `recurrent_kernel`weights matrix,used for the linear transformation of the recurrent state.(see [initializers](../initializers.md)).projection_initializer: Initializer for the `projection_kernel`weights matrix,used for the linear transformation of the projection step.(see [initializers](../initializers.md)).bias_initializer: Initializer for the bias vector(see [initializers](../initializers.md)).unit_forget_bias: Boolean.If True, add 1 to the bias of the forget gate at initialization.Setting it to true will also force `bias_initializer="zeros"`.This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)kernel_regularizer: Regularizer function applied tothe `kernel` weights matrix(see [regularizer](../regularizers.md)).recurrent_regularizer: Regularizer function applied tothe `recurrent_kernel` weights matrix(see [regularizer](../regularizers.md)).projection_regularizer: Regularizer function applied tothe `projection_kernel` weights matrix(see [regularizer](../regularizers.md)).bias_regularizer: Regularizer function applied to the bias vector(see [regularizer](../regularizers.md)).kernel_constraint: Constraint function applied tothe `kernel` weights matrix(see [constraints](../constraints.md)).recurrent_constraint: Constraint function applied tothe `recurrent_kernel` weights matrix(see [constraints](../constraints.md)).projection_constraint: Constraint function applied tothe `projection_kernel` weights matrix(see [constraints](../constraints.md)).bias_constraint: Constraint function applied to the bias vector(see [constraints](../constraints.md)).dropout: Float between 0 and 1.Fraction of the units to drop forthe linear transformation of the inputs.recurrent_dropout: Float between 0 and 1.Fraction of the units to drop forthe linear transformation of the recurrent state.implementation: Implementation mode, either 1 or 2.Mode 1 will structure its operations as a larger number ofsmaller dot products and additions, whereas mode 2 willbatch them into fewer, larger operations. These modes willhave different performance profiles on different hardware andfor different applications.return_sequences: Boolean. Whether to return the last output.in the output sequence, or the full sequence.return_state: Boolean. Whether to return the last statein addition to the output.go_backwards: Boolean (default False).If True, process the input sequence backwards and return thereversed sequence.stateful: Boolean (default False). If True, the last statefor each sample at index i in a batch will be used as initialstate for the sample of index i in the following batch.unroll: Boolean (default False).If True, the network will be unrolled,else a symbolic loop will be used.Unrolling can speed-up a RNN,although it tends to be more memory-intensive.Unrolling is only suitable for short sequences.# References- [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) (original 1997 paper)- [Learning to forget: Continual prediction with NestedLSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)- [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)- [Nested LSTMs](https://arxiv.org/abs/1801.10308)"""def __init__(self, units,projection_units=None,activation='tanh',recurrent_activation='sigmoid',projection_activation='linear',use_bias=True,kernel_initializer='glorot_uniform',recurrent_initializer='orthogonal',projection_initializer='glorot_uniform',bias_initializer='zeros',unit_forget_bias=False,kernel_regularizer=None,recurrent_regularizer=None,projection_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,recurrent_constraint=None,projection_constraint=None,bias_constraint=None,dropout=0.,recurrent_dropout=0.,implementation=2,return_sequences=False,return_state=False,go_backwards=False,stateful=False,unroll=False,**kwargs):if implementation == 0:warnings.warn('`implementation=0` has been deprecated, ''and now defaults to `implementation=2`.''Please update your layer call.')if K.backend() == 'theano':warnings.warn('RNN dropout is no longer supported with the Theano backend ''due to technical limitations. ''You can either set `dropout` and `recurrent_dropout` to 0, ''or use the TensorFlow backend.')dropout = 0.recurrent_dropout = 0.cell = NASCell(units, projection_units,activation=activation,recurrent_activation=recurrent_activation,projection_activation=projection_activation,use_bias=use_bias,kernel_initializer=kernel_initializer,recurrent_initializer=recurrent_initializer,projection_initializer=projection_initializer,unit_forget_bias=unit_forget_bias,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,recurrent_regularizer=recurrent_regularizer,bias_regularizer=bias_regularizer,projection_regularizer=projection_regularizer,kernel_constraint=kernel_constraint,recurrent_constraint=recurrent_constraint,bias_constraint=bias_constraint,projection_constraint=projection_constraint,dropout=dropout,recurrent_dropout=recurrent_dropout,implementation=implementation)super(NASRNN, self).__init__(cell,return_sequences=return_sequences,return_state=return_state,go_backwards=go_backwards,stateful=stateful,unroll=unroll,**kwargs)self.activity_regularizer = regularizers.get(activity_regularizer)def call(self, inputs, mask=None, training=None, initial_state=None, constants=None):self.cell._dropout_mask = Noneself.cell._recurrent_dropout_mask = Nonereturn super(NASRNN, self).call(inputs,mask=mask,training=training,initial_state=initial_state,constants=constants)@propertydef units(self):return self.cell.units@propertydef projection_units(self):return self.cell.projection_units@propertydef activation(self):return self.cell.activation@propertydef recurrent_activation(self):return self.cell.recurrent_activation@propertydef projection_activation(self):return self.cell.projection_activation@propertydef use_bias(self):return self.cell.use_bias@propertydef kernel_initializer(self):return self.cell.kernel_initializer@propertydef recurrent_initializer(self):return self.cell.recurrent_initializer@propertydef bias_initializer(self):return self.cell.bias_initializer@propertydef projection_initializer(self):return self.cell.projection_initializer@propertydef unit_forget_bias(self):return self.cell.unit_forget_bias@propertydef kernel_regularizer(self):return self.cell.kernel_regularizer@propertydef recurrent_regularizer(self):return self.cell.recurrent_regularizer@propertydef bias_regularizer(self):return self.cell.bias_regularizer@propertydef projection_regularizer(self):return self.cell.projection_regularizer@propertydef kernel_constraint(self):return self.cell.kernel_constraint@propertydef recurrent_constraint(self):return self.cell.recurrent_constraint@propertydef bias_constraint(self):return self.cell.bias_constraint@propertydef projection_constraint(self):return self.cell.projection_constraint@propertydef dropout(self):return self.cell.dropout@propertydef recurrent_dropout(self):return self.cell.recurrent_dropout@propertydef implementation(self):return self.cell.implementationdef get_config(self):config = {'units': self.units,'projection_units': self.projection_units,'activation': activations.serialize(self.activation),'recurrent_activation': activations.serialize(self.recurrent_activation),'projection_activation': activations.serialize(self.projection_activation),'use_bias': self.use_bias,'kernel_initializer': initializers.serialize(self.kernel_initializer),'recurrent_initializer': initializers.serialize(self.recurrent_initializer),'bias_initializer': initializers.serialize(self.bias_initializer),'projection_initializer': initializers.serialize(self.projection_initializer),'unit_forget_bias': self.unit_forget_bias,'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),'bias_regularizer': regularizers.serialize(self.bias_regularizer),'projection_regularizer': regularizers.serialize(self.projection_regularizer),'activity_regularizer': regularizers.serialize(self.activity_regularizer),'kernel_constraint': constraints.serialize(self.kernel_constraint),'recurrent_constraint': constraints.serialize(self.recurrent_constraint),'bias_constraint': constraints.serialize(self.bias_constraint),'projection_constraint': constraints.serialize(self.projection_constraint),'dropout': self.dropout,'recurrent_dropout': self.recurrent_dropout,'implementation': self.implementation}base_config = super(NASRNN, self).get_config()del base_config['cell']return dict(list(base_config.items()) + list(config.items()))@classmethoddef from_config(cls, config):if 'implementation' in config and config['implementation'] == 0:config['implementation'] = 2return cls(**config)

这篇关于NAS with RL（Using TensorFlow）的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！