forked from nlopez99/DeepMusicClassification
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataset_tools.py
More file actions
117 lines (88 loc) · 3.72 KB
/
dataset_tools.py
File metadata and controls
117 lines (88 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import re
import ffmpeg
from pydub import AudioSegment
import librosa as lr
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import pickle
def convert_au_to_wav(data_dir, genres):
""" Converts all .au audio files to .wav """
for genre in genres:
current_genre_path = os.path.join(data_dir, genre)
for root, dirs, files in os.walk(current_genre_path):
for file in files:
base_name = file.split('.au')[0]
(
ffmpeg
.input(f"{os.path.join(current_genre_path, file)}")
.output(f"{os.path.join(current_genre_path, base_name)}.wav")
.run()
)
os.remove(os.path.join(current_genre_path, file))
def slice_audio(audio_file, end=3):
""" Slices a single audio file into 3 second chunks """
start = 0
end *= 1000
audio = AudioSegment.from_wav(audio_file)
slices = []
for i in range(10):
audio_slice = audio[start:end]
slices.append(audio_slice)
start += 3000
end += 3000
return slices
def slice_dataset(data_dir, genres):
""" Iterates through entire dataset and converts every audio file to 3 second slices """
for genre in genres:
current_index = 0
current_genre_path = os.path.join(data_dir, genre)
for root, dirs, files in os.walk(current_genre_path):
for file in files:
song_genre = file.split('.')[0]
audio_file = os.path.join(current_genre_path, file)
audio_slices = slice_audio(audio_file)
audio_path = os.path.join(current_genre_path, song_genre)
for audio in audio_slices:
audio.export(f"{audio_path}.{current_index}.wav", format="wav")
current_index += 1
def clean_up_files(data_dir, genres):
""" Iterates through dataset and removes original whole audio files """
regexp = re.compile(r"\d\d\d\d\d")
for genre in genres:
current_genre_path = os.path.join(data_dir, genre)
for root, dirs, files in os.walk(current_genre_path):
for file in files:
if regexp.search(file):
os.remove(os.path.join(root, file))
def build_training_data(data_dir, genres, width, height):
""" builds training data from audio slices and return an array of image data and label """
training_data = []
for genre in genres:
current_genre_path = os.path.join(data_dir, genre)
for root, dirs, files in os.walk(current_genre_path):
for file in files:
song_genre = file.split('.')[0]
class_num = genres.index(song_genre)
song_path = os.path.join(root, file)
audio, sr = lr.load(song_path, sr=22050)
spectrogram = lr.feature.melspectrogram(y=audio, sr=sr)
img = cv2.resize(spectrogram, (width, height))
training_data.append([img, song_genre])
X = [img for img, _ in training_data]
y = [label for _, label in training_data]
return X, y
def preprocess_data(X, y, width, height):
""" Reshapes arrays and converts labels to one-hot arrays and splits to train and test """
X = np.array(X).reshape(-1, width, height, 1)
y = np.array(y)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = to_categorical(y)
X = X / 255.0 # normalize pixel values
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)
return X_train, X_test, y_train, y_test