本文主要是介绍[ Sequence Models:W1A3 ] Improvise a Jazz Solo with an LSTM Network,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
目录
- Packages
- 1. Problem Statement
- 1.1 Dataset
- 1.2 Model Overview
- 2. Building the Model
- 3. Generating Music
- 3.1 Predicting & Sampling
- 3.2 Generate Music
Packages
import IPython
import sys
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tffrom music21 import *
from grammar import *
from qa import *
from preprocess import *
from music_utils import *
from data_utils import *
from outputs import *
from test_utils import *from tensorflow.keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
1. Problem Statement
1.1 Dataset
IPython.display.Audio('./data/30s_seq.wav')X, Y, n_values, indices_values, chords = load_music_utils('data/original_metheny.mid')
print('number of training examples:', X.shape[0])
print('Tx (length of sequence):', X.shape[1])
print('total # of unique values:', n_values)
print('shape of X:', X.shape)
print('Shape of Y:', Y.shape)
print('Number of chords', len(chords))
1.2 Model Overview
2. Building the Model
# number of dimensions for the hidden state of each LSTM cell.
n_a = 64 n_values = 90 # number of music values
reshaper = Reshape((1, n_values)) # Used in Step 2.B of djmodel(), below
LSTM_cell = LSTM(n_a, return_state = True) # Used in Step 2.C
densor = Dense(n_values, activation='softmax') # Used in Step 2.Ddef djmodel(Tx, LSTM_cell, densor, reshaper):"""Implement the djmodel composed of Tx LSTM cells where each cell is responsiblefor learning the following note based on the previous note and context.Each cell has the following schema: [X_{t}, a_{t-1}, c0_{t-1}] -> RESHAPE() -> LSTM() -> DENSE()Arguments:Tx -- length of the sequences in the corpusLSTM_cell -- LSTM layer instancedensor -- Dense layer instancereshaper -- Reshape layer instanceReturns:model -- a keras instance model with inputs [X, a0, c0]"""# Get the shape of input valuesn_values = densor.units# Get the number of the hidden state vectorn_a = LSTM_cell.units# Define the input layer and specify the shapeX = Input(shape=(Tx, n_values)) # Define the initial hidden state a0 and initial cell state c0# using `Input`a0 = Input(shape=(n_a,), name='a0')c0 = Input(shape=(n_a,), name='c0')a = a0c = c0# Step 1: Create empty list to append the outputs while you iterate (≈1 line)outputs = []# Step 2: Loop over txfor t in range(Tx):# Step 2.A: select the "t"th time step vector from X. x = X[:,t,:]# Step 2.B: Use reshaper to reshape x to be (1, n_values) (≈1 line)x = reshaper(x)# Step 2.C: Perform one step of the LSTM_cell_, a, c = LSTM_cell(x,initial_state=[a,c])# Step 2.D: Apply densor to the hidden state output of LSTM_Cellout = densor(a)# Step 2.E: add the output to "outputs"outputs.append(out)# Step 3: Create model instancemodel = Model(inputs=[X, a0, c0], outputs=outputs)return modelmodel = djmodel(Tx=30, LSTM_cell=LSTM_cell, densor=densor, reshaper=reshaper)
opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])m = 60
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))history = model.fit([X, a0, c0], list(Y), epochs=100, verbose = 0)
print(f"loss at epoch 1: {history.history['loss'][0]}")
print(f"loss at epoch 100: {history.history['loss'][99]}")
plt.plot(history.history['loss'])
3. Generating Music
3.1 Predicting & Sampling
def music_inference_model(LSTM_cell, densor, Ty=100):"""Uses the trained "LSTM_cell" and "densor" from model() to generate a sequence of values.Arguments:LSTM_cell -- the trained "LSTM_cell" from model(), Keras layer objectdensor -- the trained "densor" from model(), Keras layer objectTy -- integer, number of time steps to generateReturns:inference_model -- Keras model instance"""# Get the shape of input valuesn_values = densor.units# Get the number of the hidden state vectorn_a = LSTM_cell.units# Define the input of your model with a shape x0 = Input(shape=(1, n_values))# Define s0, initial hidden state for the decoder LSTMa0 = Input(shape=(n_a,), name='a0')c0 = Input(shape=(n_a,), name='c0')a = a0c = c0x = x0# Step 1: Create an empty list of "outputs" to later store your predicted values (≈1 line)outputs = []# Step 2: Loop over Ty and generate a value at every time stepfor t in range(Ty):# Step 2.A: Perform one step of LSTM_cell. Use "x", not "x0" (≈1 line)_, a, c = LSTM_cell(x,initial_state=[a,c])# Step 2.B: Apply Dense layer to the hidden state output of the LSTM_cell (≈1 line)out = densor(a)# Step 2.C: Append the prediction "out" to "outputs". out.shape = (None, 90) (≈1 line)outputs.append(out)# Step 2.D: # Select the next value according to "out",# Set "x" to be the one-hot representation of the selected value# See instructions above.x = tf.math.argmax(out,axis=-1)x = tf.one_hot(x,depth=n_values)# Step 2.E: # Use RepeatVector(1) to convert x into a tensor with shape=(None, 1, 90)x = RepeatVector(1)(x)# Step 3: Create model instance with the correct "inputs" and "outputs" (≈1 line)inference_model = Model(inputs=[x0,a0,c0],outputs=outputs)return inference_modelinference_model = music_inference_model(LSTM_cell, densor, Ty = 50)x_initializer = np.zeros((1, 1, n_values))
a_initializer = np.zeros((1, n_a))
c_initializer = np.zeros((1, n_a))def predict_and_sample(inference_model, x_initializer = x_initializer, a_initializer = a_initializer, c_initializer = c_initializer):"""Predicts the next value of values using the inference model.Arguments:inference_model -- Keras model instance for inference timex_initializer -- numpy array of shape (1, 1, 90), one-hot vector initializing the values generationa_initializer -- numpy array of shape (1, n_a), initializing the hidden state of the LSTM_cellc_initializer -- numpy array of shape (1, n_a), initializing the cell state of the LSTM_celReturns:results -- numpy-array of shape (Ty, 90), matrix of one-hot vectors representing the values generatedindices -- numpy-array of shape (Ty, 1), matrix of indices representing the values generated"""n_values = x_initializer.shape[2]# Step 1: Use your inference model to predict an output sequence given x_initializer, a_initializer and c_initializer.pred = inference_model.predict([x_initializer, a_initializer , c_initializer])# Step 2: Convert "pred" into an np.array() of indices with the maximum probabilitiesindices = np.argmax(pred,axis=2)# Step 3: Convert indices to one-hot vectors, the shape of the results should be (Ty, n_values)results = to_categorical(indices, num_classes=x_initializer.shape[2])return results, indicesresults, indices = predict_and_sample(inference_model, x_initializer, a_initializer, c_initializer)print("np.argmax(results[12]) =", np.argmax(results[12]))
print("np.argmax(results[17]) =", np.argmax(results[17]))
print("list(indices[12:18]) =", list(indices[12:18]))
3.2 Generate Music
out_stream = generate_music(inference_model, indices_values, chords)mid2wav('output/my_music.midi')
IPython.display.Audio('./output/rendered.wav')
这篇关于[ Sequence Models:W1A3 ] Improvise a Jazz Solo with an LSTM Network的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!