Learning melody and rhythm at the same time
But before, we'll take a piece from Fastai v1 that I couldn't find in Fastai2, the Linear Decoder:
And finally, the model:
# test
from fastai2.text.data import make_vocab
from testing import test_eq, path
from neuralmusic.midi import parse_midi_file, row_to_triplets
from neuralmusic.data.preprocessing import preprocess
raw_df = parse_midi_file(path("data/ff4-airship.mid"))
df, pitch_count, duration_count = preprocess(raw_df)
song = row_to_triplets(df, 0)
batch_size = 1
seq_len = 10
prompt = song[0:seq_len]
pitch_vocab = make_vocab(pitch_count, min_freq=1)
duration_vocab = make_vocab(duration_count, min_freq=1)
model = TheModel(
pitch_len=len(pitch_vocab),
duration_len=len(duration_vocab),
kind="dual",
emb_size=1000,
rnn_size=1200,
rnn_layers=2,
)
pitch_out, duration_out = model(triplets_to_input(prompt, pitch_vocab, duration_vocab))
test_eq(torch.Size([batch_size, seq_len, len(pitch_vocab)]), pitch_out.shape)
test_eq(torch.Size([batch_size, seq_len, len(duration_vocab)]), duration_out.shape)
model
Prediction¶
To predict notes from a prompt (a sequence of triplets to prime the model), we'll need a couple more functions.
# test
predicted = predict(
torch.device("cpu"), model, prompt, pitch_vocab, duration_vocab, top_k=1, n_notes=5
)
pitch, duration = predicted[0]
pitch, duration, pitch_vocab.index(pitch), duration_vocab.index(duration)