import time
from enum import Enum
import numpy as np
import librosa
PATH_LIST = "wav_list.txt"
DEFAULT_FS = 8000
class Feature_Types(Enum):
SPECTRUM = 1
SPECTRUM_CENTROID = 2
MFCC = 3
feature_type = Feature_Types.MFCC
print("#1 [Wav files read]")
with open(PATH_LIST) as f:
path_list = [line.strip() for line in f.readlines()]
x_and_fs_list = []
for path in path_list:
x, fs = librosa.load(path, DEFAULT_FS)
x_and_fs_list.append((x, fs))
print("> | {} : {}".format("Index", "Path"))
for index in range(len(path_list)):
print("> | {} : {}".format(index + 1, path_list[index]))
print("")
print("#2 [Feature extraction]")
feature_list = []
for x_and_fs in x_and_fs_list:
x = x_and_fs[0]
fs = x_and_fs[1]
if feature_type == Feature_Types.SPECTRUM:
feature = np.abs(librosa.stft(x))
elif feature_type == Feature_Types.SPECTRUM_CENTROID:
feature = librosa.feature.spectral_centroid(x, fs)
elif feature_type == Feature_Types.MFCC:
feature = librosa.feature.mfcc(x, fs)
feature_list.append(feature)
print("")
start = time.time()
print("#3 [Evaluation]")
reference_index = 0
reference_feature = feature_list[reference_index]
print("> Reference : {} ({})".format(reference_index + 1, path_list[reference_index]))
eval_list = []
for target_feature in feature_list:
ac, wp = librosa.sequence.dtw(reference_feature, target_feature)
eval = 1 - (ac[-1][-1] / np.array(ac).max())
eval_list.append(eval)
print("> | {} , {} : {}".format("Reference", "Target", "Score"))
for target_index in range(len(eval_list)):
eval = eval_list[target_index]
print("> | {} , {} : {}".format(reference_index + 1, target_index + 1, round(eval, 4)))
print("")
end = time.time()
print("Total elapsed time : {}[sec]".format(round(end - start, 4)))