VoiceDetection/main.py at main · timkrebs/VoiceDetection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
from utils.hmm.gmmhmm import GMM
import scipy
import scipy.io.wavfile
import matplotlib.pyplot as plt

from utils.filt.filter import *
from utils.builder.dataset import dataSetBuilder


# 1. Step: Vorverarbeitung
#   1.1 Abtastung
#   1.2 Filterung
#   1.3 Trasnformation
#   1.4 Merkmalsvektor
#   1.5 Cepstrum

def plot_contours(data, means, covs, title):
    """visualize the gaussian components over the data"""
    plt.figure()
    plt.plot(data[:, 0], data[:, 1], 'ko')

    delta = 0.025
    k = means.shape[0]
    x = np.arange(-2.0, 7.0, delta)
    y = np.arange(-2.0, 7.0, delta)
    x_grid, y_grid = np.meshgrid(x, y)
    coordinates = np.array([x_grid.ravel(), y_grid.ravel()]).T

    col = ['green', 'red', 'indigo']
    for i in range(k):
        mean = means[i]
        cov = covs[i]
        z_grid = multivariate_normal(mean, cov).pdf(coordinates).reshape(x_grid.shape)
        plt.contour(x_grid, y_grid, z_grid, colors = col[i])

    plt.title(title)
    plt.tight_layout()


def main():
    # Get the dir of training_data
    trainData = "./train_audio/"
    testData = "./test_audio/"

    # Build trainings dataset
    trainDataSet = dataSetBuilder(trainData)

    # Train GMMHMM Models
    gmm = GMM(n_components = 3, n_iters = 1, tol = 1e-4, seed = 4)
    gmm.fit(trainDataSet)

    plot_contours(trainDataSet, gmm.means, gmm.covs, 'Initial clusters')

    #build test dataset
    testDataSet = dataSetBuilder(testData)

    cnt = 0

    #for label in testDataSet.keys():
    #    feature = testDataSet[label]
    #    scoreList = {}
    #    for model_label in hmmModel.keys():
    #        model = hmmModel[model_label]
    #        score = model.score(feature[0])
    #        scoreList[model_label] = score
    #    predict = max(scoreList, key=scoreList.get)
    #    print("Test on true label ", label, ": predict result label is ", predict)
    #    if predict == label:
    #        cnt+=1
    #print("Final recognition rate is %.2f"%(100.0*cnt/len(testDataSet.keys())), "%")


if __name__ == '__main__':
    main()

    # Read audio file in array
        #fs, sig = scipy.io.wavfile.read("OSR/"+filename)


    # Filter the Signal with Low Pass Filter
        #sig = movingAverage(x=sig, sr=fs, cutoff)

        #x = get_features(signal=sig, sample_rate=fs)


    # get voiced frames
        #energy, vad, voiced = naive_frame_energy_vad(sig, fs, threshold=-20, win_len=0.025, win_hop=0.025)


    # plot results
        #multi_plots(data=[sig, energy, vad, voiced], titles=["Input signal (voiced + silence)", "Short time energy", "Voice activity detection", "Output signal (voiced only)"], fs=fs, plot_rows=4, step=1)

    # save voiced signal
        #scipy.io.wavfile.write("rame_energy_vad"+ filename, fs,  np.array(voiced, dtype=sig.dtype))