티스토리 뷰
In [1]:
import os
from os.path import isdir, join
from pathlib import Path
import pandas as pd
# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa
from sklearn.decomposition import PCA
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display
from matplotlib.pyplot import imshow
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import pandas as pd
from PIL import Image
%matplotlib inline
In [2]:
def ishow(data):
height, width = data.shape[0], data.shape[1] #in pixels
tmin = np.amin(data)
tmax = np.amax(data)
data = np.add(data, -tmin, casting='unsafe')
size = width * 10, height * 10
data *= 255. / (tmax - tmin - 1)
im = Image.fromarray(data)
imshow(im)
In [3]:
train_audio_path = 'G:\\datalab\\train'
filename = '\\right\\00b01445_nohash_0.wav'
sample_rate, samples = wavfile.read(str(train_audio_path) + filename)
spectrogram 값과 이미지화¶
In [4]:
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
print(spectrogram)
im = Image.fromarray(spectrogram.T)
imshow(im)
Out[4]:
log spectrogram 값과 이미지화¶
In [6]:
def log_specgram(audio, sample_rate, window_size=20,
step_size=10, eps=1e-10):
nperseg = int(round(window_size * sample_rate / 1e3))
noverlap = int(round(step_size * sample_rate / 1e3))
freqs, times, spec = signal.spectrogram(audio,
fs=sample_rate,
window='hann',
nperseg=nperseg,
noverlap=noverlap,
detrend=False)
return freqs, times, np.log(spec.T.astype(np.float32) + eps)
In [7]:
freqs, times, spectrogram = log_specgram(samples, sample_rate)
ishow(spectrogram)
'''
height, width = spectrogram.shape[0], spectrogram.shape[1] #in pixels
tmin = np.amin(spectrogram)
tmax = np.amax(spectrogram)
mfccs = np.add(spectrogram, -tmin, casting='unsafe')
size = width * 10, height * 10
spectrogram *= 255. / (tmax - tmin - 1)
im = Image.fromarray(spectrogram)
imshow(im)
'''
freqs, times, spectrogram = log_specgram(samples, sample_rate)
fig = plt.figure(figsize=(14, 8))
ax1 = fig.add_subplot(211)
ax1.plot(np.linspace(0, sample_rate/len(samples), len(samples)), samples)
ax2 = fig.add_subplot(212)
ax2.imshow(spectrogram.T, aspect='auto', origin='lower',
extent=[times.min(), times.max(), freqs.min(), freqs.max()])
Out[7]:
In [15]:
imshow(spectrogram.T, aspect='auto', origin='lower')
Out[15]:
melspectrogram 값과 이미지화¶
In [8]:
S = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=128)
ishow(S.T)
# Convert to log scale (dB). We'll use the peak power (max) as reference.
log_S = librosa.power_to_db(S, ref=np.max)
ishow(log_S.T)
plt.figure(figsize=(12, 4))
librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel')
plt.title('Mel power spectrogram ')
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
MFCC 값 이미지화¶
In [9]:
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=20)
# Let's pad on the first and second deltas while we're at it
delta2_mfcc = librosa.feature.delta(mfcc, order=2)
print(delta2_mfcc.shape)
plt.figure(figsize=(12, 4))
librosa.display.specshow(delta2_mfcc)
plt.ylabel('MFCC coeffs')
plt.xlabel('Time')
plt.title('MFCC')
plt.colorbar()
plt.tight_layout()
In [10]:
r, d = librosa.load(str(train_audio_path) + filename)
#print(r,d)
mfccs = librosa.feature.mfcc(y=r, sr=d, n_mfcc=20)
mfccs = librosa.feature.delta(mfccs, order=2)
height, width = mfccs.shape[0], mfccs.shape[1] #in pixels
print(mfccs.shape)
tmin = np.amin(mfccs)
tmax = np.amax(mfccs)
mfccs = np.add(mfccs, -tmin, casting='unsafe')
size = width * 10, height * 10
mfccs *= 255. / (tmax - tmin - 1)
im = Image.fromarray(mfccs)
imshow(im)
Out[10]:
In [11]:
r, d = librosa.load(str(train_audio_path) + filename)
#print(r,d)
mfccs = librosa.feature.mfcc(y=r, sr=d, n_mfcc=20)
height, width = mfccs.shape[0], mfccs.shape[1] #in pixels
print(mfccs.shape)
tmin = np.amin(mfccs)
tmax = np.amax(mfccs)
mfccs = np.add(mfccs, -tmin, casting='unsafe')
size = width * 10, height * 10
mfccs *= 255. / (tmax - tmin - 1)
im = Image.fromarray(mfccs)
imshow(im)
Out[11]:
In [13]:
data = [go.Surface(z=spectrogram.T)]
layout = go.Layout(
title='Specgtrogram of "right" in 3d',
scene = dict(
yaxis = dict(title='Frequencies', range=freqs),
xaxis = dict(title='Time', range=times),
zaxis = dict(title='Log amplitude'),
),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
In [17]:
freqs, times, spectrogram_cut = log_specgram(samples, sample_rate)
fig = plt.figure(figsize=(14, 8))
ax1 = fig.add_subplot(211)
ax1.set_title('Raw wave of ' + filename)
ax1.set_ylabel('Amplitude')
ax1.plot(samples)
ax2 = fig.add_subplot(212)
ax2.set_title('Spectrogram of ' + filename)
ax2.set_ylabel('Frequencies * 0.1')
ax2.set_xlabel('Samples')
ax2.imshow(spectrogram_cut.T, aspect='auto', origin='lower',
extent=[times.min(), times.max(), freqs.min(), freqs.max()])
Out[17]:
FFT 필터링¶
In [18]:
def custom_fft(y, fs):
T = 1.0 / fs
N = y.shape[0]
yf = fft(y)
xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
vals = 2.0/N * np.abs(yf[0:N//2]) # FFT is simmetrical, so we take just the first half
# FFT is also complex, to we take just the real part (abs)
return xf, vals
In [19]:
new_sample_rate = 8000
sample_rate, samples = wavfile.read(str(train_audio_path) + filename)
resampled = signal.resample(samples, int(new_sample_rate/sample_rate * samples.shape[0]))
In [20]:
ipd.Audio(samples, rate=sample_rate)
Out[20]:
In [21]:
ipd.Audio(resampled, rate=new_sample_rate)
Out[21]:
In [22]:
new_sample_rate = 16000
sample_rate, samples = wavfile.read(str(train_audio_path) + filename)
resampled = signal.resample(samples, int(new_sample_rate/sample_rate * samples.shape[0]))
In [23]:
ipd.Audio(resampled, rate=new_sample_rate)
Out[23]:
In [24]:
xf, vals = custom_fft(samples, sample_rate)
plt.figure(figsize=(12, 4))
plt.title('FFT of recording sampled with ' + str(sample_rate) + ' Hz')
plt.plot(xf, vals)
plt.xlabel('Frequency')
plt.grid()
plt.show()
In [25]:
xf, vals = custom_fft(resampled, new_sample_rate)
plt.figure(figsize=(12, 4))
plt.title('FFT of recording sampled with ' + str(new_sample_rate) + ' Hz')
plt.plot(xf, vals)
plt.xlabel('Frequency')
plt.grid()
plt.show()
공지사항
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- optional
- Rails 설치
- github Actions
- 스위프트
- Ruby on Rails
- m n 관계
- Dynamic link
- 딥링크
- devise
- GitHub Pages
- putty
- go_router
- 옵셔널
- Windows에서 Rails 설치
- Route
- Railsinstaller
- Animation
- github
- Firebase
- ERB
- rails
- flutter
- 애니메이션
- rails m n 관계
- Ruby
- Aptana Studio
- M:N관계
- 루비
- 딥링킹
- 레일즈
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | ||||
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | 31 |
글 보관함