-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.py
More file actions
130 lines (111 loc) · 4.64 KB
/
utils.py
File metadata and controls
130 lines (111 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from scipy import signal
import numpy as np
import h5py
import os
from scipy.io.wavfile import write
import matplotlib.pyplot as plt
import soundfile as sf
#Feature extraction
def feature_extraction(x,fs):
frame_length_s = 0.04 # window length in seconds
frame_length = int(fs*frame_length_s) # 40ms window length in samples
# set an overlap ratio of 50 %
hop_length = frame_length//2
# Compute STFT
_,_,X = signal.stft(x, noverlap=hop_length, fs=fs,nperseg=frame_length)
number_frequencies, number_time_frames = X.shape
phaseInfo = np.angle(X)
X = np.abs(X)
# Segmentation
sample_length_s = 0.5 # segment length in seconds
sample_length = int(sample_length_s/frame_length_s) # ~1s in samples
# Trim the frames that can't be fitted into the segment size
trimmed_X = X[:, :-(number_time_frames%sample_length)]
trimmed_phaseInfo = phaseInfo[:, :-(number_time_frames%sample_length)]
# Segmentation (number of freqs x number of frames x number of segment x 1). The last dimension is 'channel'.
features = trimmed_X.reshape((number_frequencies,sample_length,-1,1), order='F')
# Transpose the feature to be in form (number of segment x number of freqs x number of frames x 1)
return trimmed_phaseInfo,features.transpose((2,0,1,3))
def get_features(dir_name):
print("----------Extracting Features--------------")
# loop through the directory and extract features from
# the audio files
features = []
trimmed_phases = []
for filename in os.listdir(dir_name):
if filename.endswith(".wav"):
x,fs = sf.read(os.path.join(dir_name, filename))
phase, feature = feature_extraction(x, fs)
features.append(feature)
trimmed_phases.append(phase)
features = np.vstack(features)
return features, trimmed_phases
def unison_shuffled_copies(a, b):
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def split(matrix, target, test_proportion):
# use this function because keras's train_test_split causes memory error
# when the data gets large
ratio = int(matrix.shape[0]*test_proportion)
X_train = matrix[ratio:,:]
X_test = matrix[:ratio,:]
Y_train = target[ratio:,:]
Y_test = target[:ratio,:]
print("----------Done Splitting---------------")
return X_train, X_test, Y_train, Y_test
def save_features(features_filename, X_train, X_test, y_train, y_test):
print("----------Saving Features--------------")
# used to save the features into hdf5 file
with h5py.File(features_filename, 'w') as f:
f.create_dataset('X_train', data=X_train)
f.create_dataset('X_test', data=X_test)
f.create_dataset('y_train', data=y_train)
f.create_dataset('y_test', data=y_test)
return X_train, X_test, y_train, y_test
def read_features(features_filename):
# use to read the features from hdf5 file
with h5py.File(features_filename, 'r') as f:
X_train = f.get('X_train').value
X_test = f.get('X_test').value
y_train = f.get('y_train').value
y_test = f.get('y_test').value
return X_train, X_test, y_train, y_test
def reconstruct(y,fs,model):
phaseInfo,feat = feature_extraction(y,fs)
yhat = model.predict(feat)
#------RECONSTRUCT THE AUDIO--------
# Restore to the original shape
yrec = yhat.transpose((1,2,0,3))
yrec = yrec.reshape((yrec.shape[0],-1), order='F')
# yrec = yrec + phaseInfo
# yrec = np.vstack((yrec,np.flipud(yrec)))
# Save output file
_, xrec = signal.istft(yrec, fs)
sf.write("output_without_phase.wav",xrec,fs, 'PCM_16')
print('Output without phase info was saved.')
yrec = yrec * np.exp(1j*phaseInfo)
# yrec = np.vstack((yrec,np.flipud(yrec)))
# Save output file
_, xrec = signal.istft(yrec, fs)
sf.write("output_with_phase.wav",xrec,fs, 'PCM_16')
print('Output with phase info was saved.')
def plotSpectrogram(y,fs):
frame_length_s = 0.04 # window length in seconds
frame_length = int(fs*frame_length_s) # 40ms window length in samples
# set an overlap ratio of 50 %
hop_length = frame_length//2
_,_,X = signal.stft(y, noverlap=hop_length, fs=fs,nperseg=frame_length)
# duration = X.shape/float(fs)
freq_scale = np.linspace(0, fs / 2, X.shape[0])
timeframe_scale = np.arange(0, X.shape[1])
# plot spectrogram (amplitude only)
W = np.abs(X)
plt.figure()
# plt.subplot(2,1,1)
plt.pcolormesh(timeframe_scale, freq_scale, np.log(W+0.00001))
plt.xlabel('Time (sample)')
plt.ylabel('Frequency (Hz)')
plt.title('Entire file Spectrogram (log scale)')
# plt.tight_layout()
plt.show()