import json
import math
import librosa
import os
DATASET_PATH = '/content/drive/MyDrive/ADReSS/ADReSS2020/ADReSS-IS2020-Train-data/train/Full_wave_enhanced_audio'
JSON_PATH = '/content/drive/MyDrive/ADReSS/ADReSS2020'
SAMPLES_TO_CONSIDER = 22050 #1 sec worth of sound
SAMPLE_RATE = 22050
DURATION = 30 #measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
def save_mfcc(dataset_path, json_path,n_mfcc=13, hop_lenth=512, n_fft=2048, num_segments= 10):
# Get the folder name
folder_name = "json_files"
# Check if the folder exists
if not os.path.exists(os.path.join(JSON_PATH, folder_name)):
# Create the folder
os.mkdir(os.path.join(JSON_PATH, folder_name))
#dictionary to store data
data= {
"mappings": [],
"labels": [],
"MFCCs": [],
"files":[]
}
num_samples_per_segments = int(SAMPLES_PER_TRACK / num_segments)
expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segments / hop_lenth) #1.2 -> 2
#loop through all the sub-dirs
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
#we need to ensure that we're not at the root level
if dirpath is not dataset_path:
#save the semantic level
dirpath_components = dirpath.split("/") #dataset_class/ Alzheimers
semantic_label = dirpath_components[-1]
data["mappings"].append(semantic_label)
print("\nProcessing{}".format(semantic_label))
#process files for a specific class
for f in filenames:
#load audio file
filepath = os.path.join(dirpath, f)
signal , sr = librosa.load(filepath, sr=SAMPLE_RATE)
#process segments extracting mfcc and storing data
for s in range(num_segments):
start_sample= num_samples_per_segments * s # s=0 -> 0
finish_sample = start_sample + num_samples_per_segments # s=0 num_samples_per_segment
#extract mfcc
mfcc= librosa.feature.mfcc(signal[start_sample:finish_sample], sr =sr,
n_fft = n_fft,
n_mfcc = n_mfcc,
hop_lenth = hop_lenth)
mfcc = mfcc.transpose
#store mfcc for segment if it has expected length
if len(mfcc) == expected_num_mfcc_vectors_per_segment:
data["mfcc"].append(mfcc.tolist())
data["labels"].append(i-1)
print("{}, segment:{}".format(filepath, s+1))
with open(json_path, "w") as fp:
json.dump(data, fp, indent = 4)
if __name__ == "__main__":
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)