experimentations/stretchinger.py

79 lines
2.9 KiB
Python

#!/usr/bin/env python3
# Copyright (C) 2021 harrysentonbury
# GNU General Public License v3.0
# I have to credit the Zulko/pianoputer (git hub) for this awesome technique.
from audio2numpy import open_audio
import numpy as np
from scipy.io import wavfile
import sounddevice as sd
class ThyStretcher():
"""
stereo or mono audio stretcher
snd_array - int16 1d or 2d numpy array
window_size - int, size of each fft window, default=2**13
hop - int, window offset, default=2**11
factor - int or float, shrinking or stretching factor, default=0.5
"""
def __init__(self, snd_array, window_size, hop, factor):
self.snd_array = snd_array
self.window_size = window_size
self.hop = hop
self.factor = factor
def stretching(self):
try:
if self.snd_array.shape[1] == 2:
pass
except IndexError:
self.snd_array = np.vstack((self.snd_array, self.snd_array)).T
self.phase_l = np.zeros(self.window_size)
self.phase_r = np.zeros(self.window_size)
self.hanning_window = np.hanning(self.window_size)
self.result = np.zeros((int(np.size(self.snd_array, axis=0) / self.factor + self.window_size), 2))
for i in np.arange(0, np.size(self.snd_array, axis=0) - (self.window_size + self.hop), self.hop*self.factor):
i = int(i)
self.a1 = self.snd_array[i: i + self.window_size, :]
self.a2 = self.snd_array[i + self.hop: i + self.window_size + self.hop, :]
# Frequency domain
self.fft_l1 = np.fft.fft(self.hanning_window * self.a1[:, 0])
self.fft_l2 = np.fft.fft(self.hanning_window * self.a2[:, 0])
self.fft_r1 = np.fft.fft(self.hanning_window * self.a1[:, 1])
self.fft_r2 = np.fft.fft(self.hanning_window * self.a2[:, 1])
# Rephase all frequencies
self.phase_l = (self.phase_l + np.angle(self.fft_l2/self.fft_l1)) % 2*np.pi
self.phase_r = (self.phase_r + np.angle(self.fft_r2/self.fft_r1)) % 2*np.pi
self.a2_l_rephased = np.fft.ifft(np.abs(self.fft_l2)*np.exp(1j*self.phase_l))
self.a2_r_rephased = np.fft.ifft(np.abs(self.fft_r2)*np.exp(1j*self.phase_r))
self.i2 = int(i/self.factor)
self.result[self.i2: self.i2 + self.window_size, 0] += self.hanning_window*self.a2_l_rephased.real
self.result[self.i2: self.i2 + self.window_size, 1] += self.hanning_window*self.a2_r_rephased.real
# normalize (16bit)
self.result = ((2**(16-4)) * self.result/self.result.max())
return self.result.astype('int16')
path_to_file = "audio/go.wav"
if path_to_file.endswith(".mp3"):
sound, sample_rate = open_audio(path_to_file)
else:
sample_rate, sound = wavfile.read(path_to_file)
sound = np.float64(sound)
stretched = ThyStretcher(sound, window_size = 2**13, hop = 2**11, factor=0.02).stretching()
sd.play(stretched, sample_rate)
sd.wait()