smoothed spectra, option to append recordings, and pause button

814491b7 · Vincent Wall · 87683a96 · 814491b7 · 814491b7 · 814491b7
Commit 814491b7 authored 2 years ago by Vincent Wall
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,9 @@
 .idea/

 three_locations_white_noise/
+
+lndw2022_sweep_1s/
+
+three_locations_sweep_1s/
+
+__pycache__/
--- a/1_record.py
+++ b/1_record.py
@@ -20,31 +20,31 @@ Before running the script, make sure to start QjackCtl.
 @copyright 2020 Robotics and Biology Lab, TU Berlin
 @licence: BSD Licence
 """
-
 import numpy
 import random
 import librosa
 import os
-import scipy
+import scipy.io.wavfile
 from matplotlib import pyplot
 from matplotlib.widgets import Button
 from jacktools.jacksignal import JackSignal
+from glob import glob

 # ==================
 # USER SETTINGS
 # ==================
 BASE_DIR = "."
-SOUND_NAME = "white_noise"  # sound to use
-CLASS_LABELS = ["top", "middle", "base"]  # classes to train
-SAMPLES_PER_CLASS = 5
-MODEL_NAME = "three_locations_white_noise"
-SHUFFLE_RECORDING_ORDER = True
+SOUND_NAME = "sweep"  # sound to use
+CLASS_LABELS = ["tip", "middle", "base", "back", "none"]  # classes to train
+SAMPLES_PER_CLASS = 20
+MODEL_NAME = "lndw2022_sweep_1s"
+SHUFFLE_RECORDING_ORDER = False
+APPEND_TO_EXISTING_FILES = True
 # ==================


-CHANNELS = 4
+CHANNELS = 1
 SR = 48000
-PICKLE_PROTOCOL = 2

 # Example sounds
 RECORDING_DELAY_SILENCE = numpy.zeros(int(SR*0.15), dtype='float32')  # the microphone has about .15 seconds delay in recording the sound
@@ -76,11 +76,17 @@ def setup_experiment():
    global label_list
    global current_idx

+
    label_list = CLASS_LABELS * SAMPLES_PER_CLASS
    if SHUFFLE_RECORDING_ORDER:
        random.shuffle(label_list)
    current_idx = 0

+    if APPEND_TO_EXISTING_FILES:
+        max_id = max([int(x.split("/")[-1].split("_")[0]) for x in glob(DATA_DIR+"/*.wav")])
+        label_list = [""]*max_id + label_list
+        current_idx = max_id
+

 def setup_jack(sound_name):
    global J

--- a/2_train.py
+++ b/2_train.py
@@ -24,17 +24,21 @@ import pickle

 from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
 from sklearn.model_selection import train_test_split
+from matplotlib import pyplot
+
+from A_record import MODEL_NAME

 # ==================
 # USER SETTINGS
 # ==================
 BASE_DIR = "."
-MODEL_NAME = "three_locations_white_noise"
+SENSORMODEL_FILENAME = "sensor_model.pkl"
 TEST_SIZE = 0  # percentage of samples left out of training and used for reporting test score
+SHOW_PLOTS = True
 # ==================

 SR = 48000
-PICKLE_PROTOCOL = 2
+PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL


 def get_num_and_label(filename):
@@ -69,6 +73,7 @@ def load_sounds(path):
            sound = librosa.load(os.path.join(path, fn), sr=SR)[0]
            sounds.append(sound)
            labels.append(label)
+    print(f"Loaded **{len(sounds)}** sounds with \nlabels: {sorted(set(labels))}")
    return sounds, labels


@@ -83,21 +88,53 @@ def sound_to_spectrum(sound):
    return series


+def sound_to_spectrum_stft(sound, n_fft=4096, in_dB=False):
+    spectrogram = numpy.abs(librosa.stft(sound, n_fft))
+    spectrum = spectrogram.sum(axis=1)
+    if in_dB:
+        # convert to decibel scale
+        spectrum = librosa.amplitude_to_db(spectrum, ref=numpy.max)
+    freqs = librosa.fft_frequencies(SR, n_fft)
+    index = pandas.Index(freqs)
+    series = pandas.Series(spectrum, index=index)
+    return series
+
+
 def save_sensor_model(path, clf, filename):
    """Saves sensor model to disk"""
    with open(os.path.join(path,filename), 'wb') as f:
        pickle.dump(clf, f, protocol=PICKLE_PROTOCOL)


+def plot_spectra(spectra, labels):
+    from matplotlib import pyplot
+    fig, ax = pyplot.subplots(1)
+    color_list = pyplot.rcParams['axes.prop_cycle'].by_key()['color']
+    cdict = dict(zip(sorted(list(set(labels))), color_list))
+    for i, (s, l) in enumerate(zip(spectra, labels)):
+        ax.plot(s, c=cdict[l])
+
+    from matplotlib.lines import Line2D
+    legend_lines = [Line2D([0], [0], color=col, lw=4) for col in cdict.values()]
+    legend_labels = list(cdict.keys())
+    ax.legend(legend_lines, legend_labels)
+
+    fig.show()
+
+
 def main():
    print("Running for model '{}'".format(MODEL_NAME))
    global DATA_DIR
    DATA_DIR = os.path.join(BASE_DIR, MODEL_NAME)

    sounds, labels = load_sounds(DATA_DIR)
-    spectra = [sound_to_spectrum(sound) for sound in sounds]
+    # spectra = [sound_to_spectrum(sound) for sound in sounds]
+    spectra = [sound_to_spectrum_stft(sound) for sound in sounds]
    classes = list(set(labels))

+    if SHOW_PLOTS:
+        plot_spectra(spectra, labels)
+
    if TEST_SIZE > 0:
        X_train, X_test, y_train, y_test = train_test_split(spectra, labels, test_size=TEST_SIZE)
    else:
@@ -113,9 +150,12 @@ def main():
        test_score = clf.score(X_test, y_test)
        print("Test score: {:.2f}".format(test_score))

-    sensor_model_filename = "sensor_model.pkl"
-    save_sensor_model(DATA_DIR, clf, sensor_model_filename)
-    print("\nSaved model to '{}'".format(os.path.join(DATA_DIR, sensor_model_filename)))
+    save_sensor_model(DATA_DIR, clf, SENSORMODEL_FILENAME)
+    print("\nSaved model to '{}'".format(os.path.join(DATA_DIR, SENSORMODEL_FILENAME)))
+
+    if SHOW_PLOTS:
+        pyplot.pause(0.1)
+        pyplot.show()


 if __name__ == "__main__":

--- a/3_sense.py
+++ b/3_sense.py
@@ -27,19 +27,23 @@ import pickle
 from sklearn.neighbors import KNeighborsClassifier
 from jacktools.jacksignal import JackSignal
 import matplotlib.pyplot as plt
+from matplotlib.widgets import Button
+
+from A_record import MODEL_NAME
+from B_train import SENSORMODEL_FILENAME
+from B_train import sound_to_spectrum, sound_to_spectrum_stft

 # ==================
 # USER SETTINGS
 # ==================
 BASE_DIR = "."
-MODEL_NAME = "three_locations_white_noise"
-SENSORMODEL_FILENAME = "sensor_model.pkl"
 CONTINUOUSLY = True  # chose between continuous sensing or manually triggered
 # ==================

 CHANNELS = 1
 SR = 48000
-PICKLE_PROTOCOL = 2
+
+is_paused = False

 plt.ion()
 # plt.xkcd()
@@ -47,6 +51,7 @@ plt.ion()
 if sys.version_info.major == 2:
   input = raw_input

+
 class LiveAcousticSensor(object):
    def __init__(self):
        # load sound from file (starts with "0_")
@@ -81,6 +86,7 @@ class LiveAcousticSensor(object):
            self.clf = pickle.load(f)
        print(self.clf.classes_)

+
    def setup_window(self):
        f = plt.figure(1)
        f.clear()
@@ -88,38 +94,36 @@ class LiveAcousticSensor(object):
        ax1 = f.add_subplot(2, 2, 1)
        ax1.set_title("Recorded sound (waveform)", size=20)
        ax1.set_xlabel("Time [samples]")
+        ax1.set_ylim([-1, 1])
+
        ax2 = f.add_subplot(2, 2, 2)
        ax2.set_title("Amplitude spectrum", size=20)
        ax2.set_xlabel("Frequency [Hz]")
        self.wavelines, = ax1.plot(self.Ains[0])
-        self.spectrumlines, = ax2.plot(self.sound_to_spectrum(self.Ains[0]))
-        ax2.set_ylim([0, 350])
+        self.spectrumlines, = ax2.plot(sound_to_spectrum_stft(self.Ains[0]))
+        ax2.set_ylim([0, 250])

        ax3 = f.add_subplot(2, 1, 2)
-        ax3.text(0.0, 0.3, "Sensing result:", dict(size=40))
-        self.predictiontext = ax3.text(0.5, 0.25, "", dict(size=70))
+        ax3.text(0.0, 0.8, "Sensing result:", dict(size=40))
+        self.predictiontext = ax3.text(0.25, 0.25, "", dict(size=70))
        ax3.set_xticklabels([])
        ax3.set_yticklabels([])
        # ax3.set_title("Contact location")
        ax3.axis('off')

+        ax_pause = plt.axes([0.91, 0.025, 0.05, 0.075])
+        self.b_pause = Button(ax_pause, '[P]ause')
+        self.b_pause.on_clicked(toggle_pause)
+        cid = f.canvas.mpl_connect('key_press_event', on_key)
+
        f.show()
        plt.draw()
        plt.pause(0.00001)

-    def sound_to_spectrum(self, sound):
-        """Convert sounds to frequency spectra"""
-        spectrum = numpy.fft.rfft(sound)
-        amplitude_spectrum = numpy.abs(spectrum)
-        d = 1.0 / SR
-        freqs = numpy.fft.rfftfreq(len(sound), d)
-        index = pandas.Index(freqs)
-        series = pandas.Series(amplitude_spectrum, index=index)
-        return series
-
    def predict(self):
        for i in range(CHANNELS):
-            spectrum = self.sound_to_spectrum(self.Ains[i])
+            # spectrum = self.sound_to_spectrum(self.Ains[i])
+            spectrum = sound_to_spectrum_stft(self.Ains[i])
            prediction = self.clf.predict([spectrum])
        self.wavelines.set_ydata(self.Ains[0].reshape(-1))
        self.spectrumlines.set_ydata(spectrum)
@@ -132,9 +136,10 @@ class LiveAcousticSensor(object):
    def run(self):
        if CONTINUOUSLY:
            while True:
-                self.J.process()
-                self.J.wait()
-                self.predict()
+                if not is_paused:
+                    self.J.process()
+                    self.J.wait()
+                    self.predict()
                plt.pause(1)
        else:
            key = input("Press <Enter> to sense! ('q' to abort)")
@@ -145,9 +150,22 @@ class LiveAcousticSensor(object):
                key = input("Press <Enter> to sense! ('q' to abort)")


+def toggle_pause(event):
+    global is_paused
+    is_paused = not is_paused
+
+
+def on_key(event):
+    if event.key == "p":
+        toggle_pause(event)
+    elif event.key == "q":
+        sys.exit()
+
+
 def main():
    global DATA_DIR
    DATA_DIR = os.path.join(BASE_DIR, MODEL_NAME)
+    global predictor
    predictor = LiveAcousticSensor()
    predictor.run()