127 lines
3.8 KiB
Matlab
127 lines
3.8 KiB
Matlab
function wiener_as(filename,outfile)
|
|
|
|
%
|
|
% Implements the Wiener filtering algorithm based on a priori SNR estimation [1].
|
|
%
|
|
% Usage: wiener_as(noisyFile, outputFile)
|
|
%
|
|
% infile - noisy speech file in .wav format
|
|
% outputFile - enhanced output file in .wav format
|
|
|
|
%
|
|
% Example call: wiener_as('sp04_babble_sn10.wav','out_wien_as.wav');
|
|
%
|
|
% References:
|
|
% [1] Scalart, P. and Filho, J. (1996). Speech enhancement based on a priori
|
|
% signal to noise estimation. Proc. IEEE Int. Conf. Acoust. , Speech, Signal
|
|
% Processing, 629-632.
|
|
%
|
|
% Authors: Yi Hu and Philipos C. Loizou
|
|
%
|
|
% Copyright (c) 2006 by Philipos C. Loizou
|
|
% $Revision: 0.0 $ $Date: 10/09/2006 $
|
|
%-------------------------------------------------------------------------
|
|
|
|
if nargin<2
|
|
fprintf('Usage: wiener_as(noisyfile.wav,outFile.wav) \n\n');
|
|
return;
|
|
end
|
|
|
|
|
|
|
|
[noisy_speech, fs]= audioread( filename);
|
|
noisy_speech= noisy_speech;
|
|
% column vector noisy_speech
|
|
|
|
% set parameter values
|
|
mu= 0.98; % smoothing factor in noise spectrum update
|
|
a_dd= 0.98; % smoothing factor in priori update
|
|
eta= 0.15; % VAD threshold
|
|
frame_dur= 20; % frame duration
|
|
L= frame_dur* fs/ 1000; % L is frame length (160 for 8k sampling rate)
|
|
hamming_win= hamming( L); % hamming window
|
|
U= ( hamming_win'* hamming_win)/ L; % normalization factor
|
|
|
|
% first 120 ms is noise only
|
|
len_120ms= fs/ 1000* 120;
|
|
% first_120ms= noisy_speech( 1: len_120ms).* ...
|
|
% (hann( len_120ms, 'periodic'))';
|
|
first_120ms= noisy_speech( 1: len_120ms);
|
|
|
|
% =============now use Welch's method to estimate power spectrum with
|
|
% Hamming window and 50% overlap
|
|
nsubframes= floor( len_120ms/ (L/ 2))- 1; % 50% overlap
|
|
noise_ps= zeros( L, 1);
|
|
n_start= 1;
|
|
for j= 1: nsubframes
|
|
noise= first_120ms( n_start: n_start+ L- 1);
|
|
noise= noise.* hamming_win;
|
|
noise_fft= fft( noise, L);
|
|
noise_ps= noise_ps+ ( abs( noise_fft).^ 2)/ (L* U);
|
|
n_start= n_start+ L/ 2;
|
|
end
|
|
noise_ps= noise_ps/ nsubframes;
|
|
%==============
|
|
|
|
% number of noisy speech frames
|
|
len1= L/ 2; % with 50% overlap
|
|
nframes= floor( length( noisy_speech)/ len1)- 1;
|
|
n_start= 1;
|
|
|
|
for j= 1: nframes
|
|
noisy= noisy_speech( n_start: n_start+ L- 1);
|
|
noisy= noisy.* hamming_win;
|
|
noisy_fft= fft( noisy, L);
|
|
noisy_ps= ( abs( noisy_fft).^ 2)/ (L* U);
|
|
|
|
% ============ voice activity detection
|
|
if (j== 1) % initialize posteri
|
|
posteri= noisy_ps./ noise_ps;
|
|
posteri_prime= posteri- 1;
|
|
posteri_prime( find( posteri_prime< 0))= 0;
|
|
priori= a_dd+ (1-a_dd)* posteri_prime;
|
|
else
|
|
posteri= noisy_ps./ noise_ps;
|
|
posteri_prime= posteri- 1;
|
|
posteri_prime( find( posteri_prime< 0))= 0;
|
|
priori= a_dd* (G_prev.^ 2).* posteri_prev+ ...
|
|
(1-a_dd)* posteri_prime;
|
|
end
|
|
|
|
log_sigma_k= posteri.* priori./ (1+ priori)- log(1+ priori);
|
|
vad_decision(j)= sum( log_sigma_k)/ L;
|
|
if (vad_decision(j)< eta)
|
|
% noise only frame found
|
|
noise_ps= mu* noise_ps+ (1- mu)* noisy_ps;
|
|
vad( n_start: n_start+ L- 1)= 0;
|
|
else
|
|
vad( n_start: n_start+ L- 1)= 1;
|
|
end
|
|
% ===end of vad===
|
|
|
|
G= sqrt( priori./ (1+ priori)); % gain function
|
|
|
|
enhanced= ifft( noisy_fft.* G, L);
|
|
|
|
if (j== 1)
|
|
enhanced_speech( n_start: n_start+ L/2- 1)= ...
|
|
enhanced( 1: L/2);
|
|
else
|
|
enhanced_speech( n_start: n_start+ L/2- 1)= ...
|
|
overlap+ enhanced( 1: L/2);
|
|
end
|
|
|
|
overlap= enhanced( L/ 2+ 1: L);
|
|
n_start= n_start+ L/ 2;
|
|
|
|
G_prev= G;
|
|
posteri_prev= posteri;
|
|
|
|
end
|
|
|
|
enhanced_speech( n_start: n_start+ L/2- 1)= overlap;
|
|
|
|
audiowrite(outfile,enhanced_speech,fs,'BitsPerSample',16);
|
|
|
|
|