File size: 5,388 Bytes
1348175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
'''
Outline:
- Create animation: animate charts (potentially using streamlit)
'''
import librosa
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import keras
import tensorflow
import matplotlib.animation as animation

model_path = "model_simple.sav" #Defines the path to the model file

emotion_map = {
        'Disgust': 0,
        'Happiness': 1,
        'Saddness': 2,
        'Neutral': 3,
        'Fear': 4,
        'Anger': 5,
        'Surprise': 6
    } #Maps emotions to integers: taken from data preprocessing

reversed_emotion_map = {value:key for key, value in emotion_map.items()}
#Reverses emotion mapping such that integers can be mapped into emotions

#Uses librosa to load the inputted audio file as a list of frequency values
@st.cache_data
def process_audio(input_file):
    st.audio(input_file) #Creates an audio player within the streamlit app
    audio_signal, sample_rate = librosa.load(input_file)
    return audio_signal, sample_rate

#Creates a line chart displaying the audio frequency using librosa
def display_spectrum_animation(audio_signal, sample_rate):
    S = np.abs(librosa.stft(audio_signal))
    frequencies = librosa.fft_frequencies(sr=sample_rate)

    fig, ax = plt.subplots()

    def update_spectrum(num, S, ax):
        ax.clear()
        ax.plot(frequencies, S[:, num])
        ax.set_xlabel("Frequency (Hz)")
        ax.set_ylabel("Amplitude")

    ani = animation.FuncAnimation(fig, update_spectrum, frames=S.shape[1], fargs=[S, ax], blit=False)
    ani.save("spectrum_animation.gif", writer="imagemagick")
    st.image("spectrum_animation.gif")


@st.cache_data
def display_frequency(audio_signal, sample_rate):
    frequency_plot = librosa.display.waveshow(audio_signal, sr = sample_rate)
    st.pyplot(plt.gcf())

#Creates and displays a mel spectrogram using librosa
@st.cache_data
def display_mel_spectogram(audio_signal, sample_rate):
    fig, ax = plt.subplots()
    audio_time = audio_signal.shape[0]/sample_rate
    D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_signal)), ref = np.max)

    amt_to_add = int(D.shape[-1]/audio_time)

    specshow = librosa.display.specshow(D, sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)
    
    def update_spectrogram (num, D, ax, plus):
        ax.clear()
        librosa.display.specshow(D[:, :num + plus], sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)

    ani = animation.FuncAnimation(fig, update_spectrogram, frames = np.arange(1, D.shape[1]), fargs = [D, ax, amt_to_add], blit = False)
    ani.save("spectrogram_animation.gif", writer = "imagemagick")
    st.image("spectrogram_animation.gif")

#Creates the interface allowing users to select which plot they want displayed
def create_selections(audio_signal, sample_rate):
    chart_options = ["Spectrum", "Mel-Spectogram"] #Graph titles go here
    functions = [display_spectrum_animation, display_mel_spectogram] #Graphing functions go here
    chart_selector = st.radio(
        label = "",
        options = chart_options,
        horizontal = True
    )
    selection_index = chart_options.index(chart_selector)
    functions[selection_index](audio_signal, sample_rate)

#Helper function to force the length of a given frequency array into a specific length
#Currently, this length is hard-coded at 66,150 though that may change in the future
@st.cache_data
def standardize_waveform_length(waveform):
    audio_length = 66150
    if len(waveform) > audio_length:
        waveform = waveform[:audio_length]
    else:
        waveform = np.pad(waveform, (0, max(0, audio_length - len(waveform))), "constant")
    return waveform

#Takes in a given audio signal and returns its mel-frequency cepstral coefficients
@st.cache_data
def preprocess_audio_for_prediction(audio_signal, sample_rate):
    waveform = standardize_waveform_length(waveform = audio_signal)
    mfcc = librosa.feature.mfcc(y = waveform, sr = sample_rate, n_mels = 128)
    mfcc = mfcc.reshape(-1)
    return mfcc

#Loads the model given in model_path and returns a Keras Sequential model
@st.cache_data
def load_model(model_path):
    model = pickle.load(open(model_path, "rb"))
    return model

#Uses the model to predict the speaker's emotion in the given audio clip
@st.cache_data
def get_emotion_prediction(mfcc):
    model = load_model(model_path)
    prediction = model.predict(mfcc[None])
    predicted_index = np.argmax(prediction)
    emotion = reversed_emotion_map[predicted_index]
    return emotion

#Combines all model functions and displays the model output as a subheader
@st.cache_data
def display_prediction(audio_signal, sample_rate):
    mfcc = preprocess_audio_for_prediction(audio_signal, sample_rate)
    prediction = get_emotion_prediction(mfcc)
    st.subheader("Predicted Emotion: " + prediction, divider = True)

#Defines the entire process of inputting audio, displaying the model's predictions, and displaying graphs
def run(input_file):
    audio_signal, sample_rate = process_audio(input_file)
    display_prediction(audio_signal, sample_rate)
    create_selections(audio_signal, sample_rate)

#Creates an input area to upload the file
def main():
    st.header("Upload your file here")
    file_uploader = st.file_uploader("", type = "wav")
    if file_uploader is not None:
        run(file_uploader)

if __name__ == "__main__":
    main()