In [None]:
import os
import librosa
import numpy as np
from essentia.standard import MonoWriter
import IPython

from pylab import plot, show, figure, imshow
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15, 6) # set plot sizes to something larger than default

In [None]:
def invlogamplitude(S):
    """librosa.logamplitude is actually 10*log10, so invert that."""
    return 10.0**(S/10.0)

def transform_and_restore(filename, n_mels=128, n_fft=512, hop_length=256, hop_stride=1, hop_fill=False, plot_mels=False, sr=16000):
    y, sr = librosa.load(filename, sr)

    # Reconstruct from Mel bands
    mels = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=int(hop_length*hop_stride))
        
    if plot_mels:
        imshow(librosa.core.amplitude_to_db(mels), aspect='auto', origin='lower', interpolation='none')
        plt.title("Mel log-spectrogram")
        show()  

    mels_fill = []
    if hop_fill:
        print("Mel-spec shape:", mels.shape)
        for frame in mels.T:
            mels_fill += [frame]*hop_stride
        mels = np.array(mels_fill).T
        print("Filled Mel-spec shape:", mels.shape)

    # Reconstruct with white noise
    excitation = np.random.randn(y.shape[0])
    #E = librosa.stft(excitation, n_fft=n_fft, hop_length=int(hop_length*hop_stride))
    E = librosa.stft(excitation, n_fft=n_fft, hop_length=hop_length)
    mels = mels[:,:E.shape[1]]      
        
    # Empirical scaling of channels to get ~flat amplitude mapping
    mel_basis = librosa.filters.mel(sr, n_fft, n_mels=n_mels)
    bin_scaling = 1.0/np.maximum(0.0005, np.sum(np.dot(mel_basis.T, mel_basis), axis=0))
    recon_stft = bin_scaling[:, np.newaxis] * np.dot(mel_basis.T, mels)
    print (recon_stft.shape)
        
    # Impose reconstructed magnitude on white noise STFT
    print("Shape E", E.shape)
    print("Shape Mel", mels.shape)
    recon = librosa.istft(E/np.abs(E)*np.sqrt(recon_stft), win_length=n_fft, hop_length=hop_length, window='hann')

    return recon, sr

In [None]:
# Folder with all the original audio files
folder = "/path/tracks/"

# Preview one original file

In [None]:
filename = folder+'TRABEAE12903CDD8EE.mp3'
orig, sr = librosa.load(filename, sr=16000)
IPython.display.Audio(orig, rate=sr)

# Sonify mel-spectrogram of that example

In [None]:
recon, sr = transform_and_restore(filename, sr=16000, n_fft=512, n_mels=48, hop_length=256, hop_stride=10, hop_fill=True, plot_mels=True)
IPython.display.Audio(recon, rate=sr)

# Generate all the audio examples

In [None]:
mels = [128, 96, 48, 32, 24, 16, 8]
temp = [1, 2, 3, 4, 5, 10]
srs = [12, 16]

files = []
for r, d, f in os.walk(folder):
    for file in f:
        if '.mp3' in file:
            for n_mel in mels:
                for tmp in temp:
                    for sr in srs:
                        recon, o_sr = transform_and_restore(os.path.join(folder, file), sr=sr*1000, n_mels=n_mel, hop_stride=tmp, hop_fill=True)
                        new_file_directory = os.path.join(folder, file.replace('.mp3',''))
                        if not os.path.exists(new_file_directory):
                            os.makedirs(new_file_directory)
                        new_filename = '{}/{}k-mel{}-x{}'.format(new_file_directory, sr, n_mel , tmp)
                        librosa.output.write_wav(new_filename+'.wav', recon, o_sr)
                        # Convert wav to flac
                        ! ffmpeg -i {new_filename}.wav -vn -ar 16000 -sample_fmt s16 -ss 0 -t 30 {new_filename}.flac
                        os.remove(new_filename+'.wav')