Rowan-Classes/6th-Semester-Spring-2024/DSP/Labs/FinalProject/statistical_based/audnoise.m
2024-04-25 18:38:09 -04:00

177 lines
5.7 KiB
Matlab

function audnoise(ns_file,outfile)
%
% Implements the audible-noise suppression algorithm [1].
%
% Usage: audnoise(noisyFile, outputFile)
%
% infile - noisy speech file in .wav format
% outputFile - enhanced output file in .wav format
%
% It runs 2 iterations, but one could change the number of iterations by
% modifying accordingly the variable iter_num on line 33.
%
% Example call: audnoise('sp04_babble_sn10.wav','out_aud.wav');
%
% References:
% [1] Tsoukalas, D. E., Mourjopoulos, J. N., and Kokkinakis, G. (1997). Speech
% enhancement based on audible noise suppression. IEEE Trans. on Speech and
% Audio Processing, 5(6), 497-514.
%
% Authors: Yi Hu and Philipos C. Loizou
%
% Copyright (c) 2006 by Philipos C. Loizou
% $Revision: 0.0 $ $Date: 10/09/2006 $
%-------------------------------------------------------------------------
if nargin<2
fprintf('Usage: audnoise(noisyfile.wav,outFile.wav) \n\n');
return;
end
iter_num=2; % number of iterations
NF_SABSENT= 6;
%this is the number of speech-absent frames to estimate the initial
%noise power spectrum
[nsdata, Fs, bits]= wavread( ns_file); %nsdata is a column vector
aa=0.98;
mu=0.98;
eta=0.15;
nwind= floor( 20* Fs/ 1000); %this corresponds to 20ms window
if rem( nwind, 2)~= 0 nwind= nwind+ 1; end %made window length even
noverlap= nwind/ 2;
w= hamming( nwind);
rowindex= ( 1: nwind)';
%we assume the first NF_SABSENT frames are speech absent, we use them to estimate the noise power spectrum
noisedata= nsdata( 1: nwind* NF_SABSENT); noise_colindex= 1+ ( 0: NF_SABSENT- 1)* nwind;
noisematrixdata = zeros( nwind, NF_SABSENT);
noisematrixdata( :)= noisedata( ...
rowindex( :, ones(1, NF_SABSENT))+ noise_colindex( ones( nwind, 1), :)- 1);
noisematrixdata= noisematrixdata.* w( :, ones( 1, NF_SABSENT)) ; %WINDOWING NOISE DATA
noise_ps= mean( (abs( fft( noisematrixdata))).^ 2, 2); %NOTE!!!! it is a column vector
% ----- estimate noise in CBs ------------------
%
noise_b=zeros(nwind/2+1,1);
[CB_FREQ_INDICES]=find_CB_FREQ_INDICES(Fs,nwind,16,nwind/2);
for i = 1:length(CB_FREQ_INDICES)
noise_b(CB_FREQ_INDICES{i})=ones(size(CB_FREQ_INDICES{i},2),1)*mean(noise_ps(CB_FREQ_INDICES{i}));
end
noise_b1=[noise_b; fliplr(noise_b(2:nwind/2))];
nslide= nwind- noverlap;
x= nsdata;
nx= length( x); ncol= fix(( nx- noverlap)/ nslide);
colindex = 1 + (0: (ncol- 1))* nslide;
if nx< (nwind + colindex(ncol) - 1)
x(nx+ 1: nwind+ colindex(ncol) - 1) = ...
rand( nwind+ colindex( ncol)- 1- nx, 1)* (2^ (-15)); % zero-padding
end
es_old= zeros( noverlap, 1);
%es_old is actually the second half of the previous enhanced speech frame,
%it is used for overlap-add
for k= 1: ncol
y= x( colindex( k): colindex( k)+ nwind- 1);
y= y.* w; %WINDOWING NOISY SPEECH DATA
y_spec= fft( y); y_specmag= abs( y_spec); y_specang= angle( y_spec);
%they are the frequency spectrum, spectrum magnitude and spectrum phase, respectively
y_ps= y_specmag.^ 2; %power spectrum of noisy speech
y_ps1=y_ps(1:nwind/2+1);
% ====start of vad ===
gammak=min(y_ps./noise_ps,40); % post SNR
if k==1
ksi=aa+(1-aa)*max(gammak-1,0);
else
ksi=aa*Xk_prev./noise_ps + (1-aa)*max(gammak-1,0); % a priori SNR
end
log_sigma_k= gammak.* ksi./ (1+ ksi)- log(1+ ksi);
vad_decision= sum( log_sigma_k)/ nwind;
if (vad_decision < eta)
% noise only frame found
noise_ps= mu* noise_ps+ (1- mu)* y_ps;
end
for i = 1:length(CB_FREQ_INDICES)
noise_b(CB_FREQ_INDICES{i})=...
ones(size(CB_FREQ_INDICES{i},2),1)*mean(noise_ps(CB_FREQ_INDICES{i}));
end
% ===end of vad===
x_cons1=max(y_ps-noise_ps,0.001);
% conservative estimate of x from power spectral subtraction
x_cons = x_cons1(1:nwind/2+1);
% --- Estimate masking thresholds iteratively (as per page 505) ----
%
Tk0=mask(x_cons,nwind,Fs,16);
Xp=y_ps1;
for j=1:iter_num
ab = noise_b+(noise_b.^2)./Tk0; % Eq. 41
Xp=(Xp.^2)./(ab+Xp); % Eq. 40
Tk0=mask(Xp,nwind,Fs,16);
end
% --- Estimate alpha ------
%
alpha = (noise_b+Tk0).*(noise_b./Tk0);
% eq. 26 for Threshold (T) method with ni(b)=1
% ---- Apply suppression rule --------------
%
H0 = (Xp./(alpha+Xp));
H=[H0(1:nwind/2+1); flipud(H0(2:nwind/2))];
x_hat = H.*y_spec;
Xk_prev= abs( x_hat).^ 2;
es_tmp=real(ifft(x_hat));
% ---- Overlap and add ---------------
es_data( colindex( k): colindex( k)+ nwind- 1)= [es_tmp( 1: noverlap)+ es_old;...
es_tmp( noverlap+ 1: nwind)];
%overlap-add
es_old= es_tmp( nwind- noverlap+ 1: nwind);
end
wavwrite( es_data, Fs, bits, outfile);
%------------------------------------------------------
function [CB_FREQ_INDICES]=find_CB_FREQ_INDICES(Fs,dft_length,nbits,frame_overlap)
% This function is from Matlab STSA Toolbox for Audio Signal Noise Reduction
% Copyright (C) 2001 Patrick J. Wolfe
freq_val = (0:Fs/dft_length:Fs/2)';
freq=freq_val;
crit_band_ends = [0;100;200;300;400;510;630;770;920;1080;1270;1480;1720;2000;2320;2700;3150;3700;4400;5300;6400;7700;9500;12000;15500;Inf];
imax = max(find(crit_band_ends < freq(end)));
num_bins = length(freq);
LIN_TO_BARK = zeros(imax,num_bins);
i = 1;
for j = 1:num_bins
while ~((freq(j) >= crit_band_ends(i)) & (freq(j) < crit_band_ends(i+1))),i = i+1;end
LIN_TO_BARK(i,j) = 1;
end
% Calculation of critical band frequency indices--i.e., which bins are in which critical band for i = 1:imax
for i=1:imax,
CB_FREQ_INDICES{i} = find(LIN_TO_BARK(i,:));
end