TensorflowでMelSpectrogram
tf.signal.stftはTFLiteで使えない。
import numpy as np
import librosa
import librosa.display
import cv2
import tensorflow as tf
def MinMaxNorm(tensor):
minval = tf.reduce_min(tensor, axis=[1,2])
maxval = tf.reduce_max(tensor, axis=[1,2])
if True:
maxval = tf.maximum(2.0, maxval)
tensor = tf.divide(tensor - minval, maxval - minval + 1e-8)
return tensor
def ToMelTF0(y, sr, DIM=128, TFLITE=False):
batch_size, num_samples, sample_rate = 1, y.shape[1], sr
if not TFLITE:
# A 1024-point STFT with frames of 64 ms and 75% overlap.
stfts = tf.signal.stft(y, frame_length=1024, frame_step=256,
fft_length=1024, pad_end=False)
spectrograms = tf.abs(stfts)
else:
y = tf.reshape(y, [1, int(y.shape[1])])
spectrograms, stfts = stft_magnitude_tflite(y, window_length_samples=1024,hop_length_samples=256,fft_length=1024)
# Warp the linear scale spectrograms into the mel-scale.
num_spectrogram_bins = stfts.shape[-1]#.value
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, DIM
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
upper_edge_hertz)
mel_spectrograms = tf.tensordot(
spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
linear_to_mel_weight_matrix.shape[-1:]))
# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
log_mel_spectrograms = MinMaxNorm(log_mel_spectrograms)
return log_mel_spectrograms
def ToMelTF(filename, sr=16000):
y, sr = librosa.load(filename, sr=sr, duration=100000)
log_mel_spectrograms =ToMelTF0(y[np.newaxis], sr)
res = log_mel_spectrograms.numpy()[0]
return res