Rowan-Classes/6th-Semester-Spring-2024/DSP/Labs/FinalProject/statistical_based/wiener_as.m
2024-04-25 18:38:09 -04:00

127 lines
3.8 KiB
Matlab

function wiener_as(filename,outfile)
%
% Implements the Wiener filtering algorithm based on a priori SNR estimation [1].
%
% Usage: wiener_as(noisyFile, outputFile)
%
% infile - noisy speech file in .wav format
% outputFile - enhanced output file in .wav format
%
% Example call: wiener_as('sp04_babble_sn10.wav','out_wien_as.wav');
%
% References:
% [1] Scalart, P. and Filho, J. (1996). Speech enhancement based on a priori
% signal to noise estimation. Proc. IEEE Int. Conf. Acoust. , Speech, Signal
% Processing, 629-632.
%
% Authors: Yi Hu and Philipos C. Loizou
%
% Copyright (c) 2006 by Philipos C. Loizou
% $Revision: 0.0 $ $Date: 10/09/2006 $
%-------------------------------------------------------------------------
if nargin<2
fprintf('Usage: wiener_as(noisyfile.wav,outFile.wav) \n\n');
return;
end
[noisy_speech, fs]= audioread( filename);
noisy_speech= noisy_speech;
% column vector noisy_speech
% set parameter values
mu= 0.98; % smoothing factor in noise spectrum update
a_dd= 0.98; % smoothing factor in priori update
eta= 0.15; % VAD threshold
frame_dur= 20; % frame duration
L= frame_dur* fs/ 1000; % L is frame length (160 for 8k sampling rate)
hamming_win= hamming( L); % hamming window
U= ( hamming_win'* hamming_win)/ L; % normalization factor
% first 120 ms is noise only
len_120ms= fs/ 1000* 120;
% first_120ms= noisy_speech( 1: len_120ms).* ...
% (hann( len_120ms, 'periodic'))';
first_120ms= noisy_speech( 1: len_120ms);
% =============now use Welch's method to estimate power spectrum with
% Hamming window and 50% overlap
nsubframes= floor( len_120ms/ (L/ 2))- 1; % 50% overlap
noise_ps= zeros( L, 1);
n_start= 1;
for j= 1: nsubframes
noise= first_120ms( n_start: n_start+ L- 1);
noise= noise.* hamming_win;
noise_fft= fft( noise, L);
noise_ps= noise_ps+ ( abs( noise_fft).^ 2)/ (L* U);
n_start= n_start+ L/ 2;
end
noise_ps= noise_ps/ nsubframes;
%==============
% number of noisy speech frames
len1= L/ 2; % with 50% overlap
nframes= floor( length( noisy_speech)/ len1)- 1;
n_start= 1;
for j= 1: nframes
noisy= noisy_speech( n_start: n_start+ L- 1);
noisy= noisy.* hamming_win;
noisy_fft= fft( noisy, L);
noisy_ps= ( abs( noisy_fft).^ 2)/ (L* U);
% ============ voice activity detection
if (j== 1) % initialize posteri
posteri= noisy_ps./ noise_ps;
posteri_prime= posteri- 1;
posteri_prime( find( posteri_prime< 0))= 0;
priori= a_dd+ (1-a_dd)* posteri_prime;
else
posteri= noisy_ps./ noise_ps;
posteri_prime= posteri- 1;
posteri_prime( find( posteri_prime< 0))= 0;
priori= a_dd* (G_prev.^ 2).* posteri_prev+ ...
(1-a_dd)* posteri_prime;
end
log_sigma_k= posteri.* priori./ (1+ priori)- log(1+ priori);
vad_decision(j)= sum( log_sigma_k)/ L;
if (vad_decision(j)< eta)
% noise only frame found
noise_ps= mu* noise_ps+ (1- mu)* noisy_ps;
vad( n_start: n_start+ L- 1)= 0;
else
vad( n_start: n_start+ L- 1)= 1;
end
% ===end of vad===
G= sqrt( priori./ (1+ priori)); % gain function
enhanced= ifft( noisy_fft.* G, L);
if (j== 1)
enhanced_speech( n_start: n_start+ L/2- 1)= ...
enhanced( 1: L/2);
else
enhanced_speech( n_start: n_start+ L/2- 1)= ...
overlap+ enhanced( 1: L/2);
end
overlap= enhanced( L/ 2+ 1: L);
n_start= n_start+ L/ 2;
G_prev= G;
posteri_prev= posteri;
end
enhanced_speech( n_start: n_start+ L/2- 1)= overlap;
audiowrite(outfile,enhanced_speech,fs,'BitsPerSample',16);