177 lines
5.7 KiB
Matlab
177 lines
5.7 KiB
Matlab
function audnoise(ns_file,outfile)
|
|
|
|
%
|
|
% Implements the audible-noise suppression algorithm [1].
|
|
%
|
|
% Usage: audnoise(noisyFile, outputFile)
|
|
%
|
|
% infile - noisy speech file in .wav format
|
|
% outputFile - enhanced output file in .wav format
|
|
%
|
|
% It runs 2 iterations, but one could change the number of iterations by
|
|
% modifying accordingly the variable iter_num on line 33.
|
|
%
|
|
% Example call: audnoise('sp04_babble_sn10.wav','out_aud.wav');
|
|
%
|
|
% References:
|
|
% [1] Tsoukalas, D. E., Mourjopoulos, J. N., and Kokkinakis, G. (1997). Speech
|
|
% enhancement based on audible noise suppression. IEEE Trans. on Speech and
|
|
% Audio Processing, 5(6), 497-514.
|
|
%
|
|
% Authors: Yi Hu and Philipos C. Loizou
|
|
%
|
|
% Copyright (c) 2006 by Philipos C. Loizou
|
|
% $Revision: 0.0 $ $Date: 10/09/2006 $
|
|
%-------------------------------------------------------------------------
|
|
|
|
if nargin<2
|
|
fprintf('Usage: audnoise(noisyfile.wav,outFile.wav) \n\n');
|
|
return;
|
|
end
|
|
|
|
|
|
iter_num=2; % number of iterations
|
|
NF_SABSENT= 6;
|
|
%this is the number of speech-absent frames to estimate the initial
|
|
%noise power spectrum
|
|
|
|
[nsdata, Fs, bits]= wavread( ns_file); %nsdata is a column vector
|
|
|
|
aa=0.98;
|
|
mu=0.98;
|
|
eta=0.15;
|
|
|
|
nwind= floor( 20* Fs/ 1000); %this corresponds to 20ms window
|
|
if rem( nwind, 2)~= 0 nwind= nwind+ 1; end %made window length even
|
|
noverlap= nwind/ 2;
|
|
w= hamming( nwind);
|
|
rowindex= ( 1: nwind)';
|
|
|
|
%we assume the first NF_SABSENT frames are speech absent, we use them to estimate the noise power spectrum
|
|
noisedata= nsdata( 1: nwind* NF_SABSENT); noise_colindex= 1+ ( 0: NF_SABSENT- 1)* nwind;
|
|
noisematrixdata = zeros( nwind, NF_SABSENT);
|
|
noisematrixdata( :)= noisedata( ...
|
|
rowindex( :, ones(1, NF_SABSENT))+ noise_colindex( ones( nwind, 1), :)- 1);
|
|
noisematrixdata= noisematrixdata.* w( :, ones( 1, NF_SABSENT)) ; %WINDOWING NOISE DATA
|
|
noise_ps= mean( (abs( fft( noisematrixdata))).^ 2, 2); %NOTE!!!! it is a column vector
|
|
|
|
% ----- estimate noise in CBs ------------------
|
|
%
|
|
noise_b=zeros(nwind/2+1,1);
|
|
[CB_FREQ_INDICES]=find_CB_FREQ_INDICES(Fs,nwind,16,nwind/2);
|
|
|
|
for i = 1:length(CB_FREQ_INDICES)
|
|
noise_b(CB_FREQ_INDICES{i})=ones(size(CB_FREQ_INDICES{i},2),1)*mean(noise_ps(CB_FREQ_INDICES{i}));
|
|
end
|
|
noise_b1=[noise_b; fliplr(noise_b(2:nwind/2))];
|
|
|
|
nslide= nwind- noverlap;
|
|
|
|
x= nsdata;
|
|
nx= length( x); ncol= fix(( nx- noverlap)/ nslide);
|
|
colindex = 1 + (0: (ncol- 1))* nslide;
|
|
if nx< (nwind + colindex(ncol) - 1)
|
|
x(nx+ 1: nwind+ colindex(ncol) - 1) = ...
|
|
rand( nwind+ colindex( ncol)- 1- nx, 1)* (2^ (-15)); % zero-padding
|
|
end
|
|
|
|
es_old= zeros( noverlap, 1);
|
|
%es_old is actually the second half of the previous enhanced speech frame,
|
|
%it is used for overlap-add
|
|
|
|
for k= 1: ncol
|
|
|
|
y= x( colindex( k): colindex( k)+ nwind- 1);
|
|
y= y.* w; %WINDOWING NOISY SPEECH DATA
|
|
|
|
y_spec= fft( y); y_specmag= abs( y_spec); y_specang= angle( y_spec);
|
|
%they are the frequency spectrum, spectrum magnitude and spectrum phase, respectively
|
|
|
|
y_ps= y_specmag.^ 2; %power spectrum of noisy speech
|
|
y_ps1=y_ps(1:nwind/2+1);
|
|
|
|
% ====start of vad ===
|
|
gammak=min(y_ps./noise_ps,40); % post SNR
|
|
if k==1
|
|
ksi=aa+(1-aa)*max(gammak-1,0);
|
|
else
|
|
ksi=aa*Xk_prev./noise_ps + (1-aa)*max(gammak-1,0); % a priori SNR
|
|
end
|
|
|
|
log_sigma_k= gammak.* ksi./ (1+ ksi)- log(1+ ksi);
|
|
vad_decision= sum( log_sigma_k)/ nwind;
|
|
if (vad_decision < eta)
|
|
% noise only frame found
|
|
noise_ps= mu* noise_ps+ (1- mu)* y_ps;
|
|
end
|
|
|
|
|
|
for i = 1:length(CB_FREQ_INDICES)
|
|
noise_b(CB_FREQ_INDICES{i})=...
|
|
ones(size(CB_FREQ_INDICES{i},2),1)*mean(noise_ps(CB_FREQ_INDICES{i}));
|
|
end
|
|
|
|
% ===end of vad===
|
|
|
|
x_cons1=max(y_ps-noise_ps,0.001);
|
|
% conservative estimate of x from power spectral subtraction
|
|
x_cons = x_cons1(1:nwind/2+1);
|
|
|
|
% --- Estimate masking thresholds iteratively (as per page 505) ----
|
|
%
|
|
Tk0=mask(x_cons,nwind,Fs,16);
|
|
Xp=y_ps1;
|
|
for j=1:iter_num
|
|
ab = noise_b+(noise_b.^2)./Tk0; % Eq. 41
|
|
Xp=(Xp.^2)./(ab+Xp); % Eq. 40
|
|
Tk0=mask(Xp,nwind,Fs,16);
|
|
end
|
|
|
|
% --- Estimate alpha ------
|
|
%
|
|
alpha = (noise_b+Tk0).*(noise_b./Tk0);
|
|
% eq. 26 for Threshold (T) method with ni(b)=1
|
|
|
|
% ---- Apply suppression rule --------------
|
|
%
|
|
H0 = (Xp./(alpha+Xp));
|
|
H=[H0(1:nwind/2+1); flipud(H0(2:nwind/2))];
|
|
|
|
x_hat = H.*y_spec;
|
|
Xk_prev= abs( x_hat).^ 2;
|
|
|
|
es_tmp=real(ifft(x_hat));
|
|
|
|
% ---- Overlap and add ---------------
|
|
|
|
es_data( colindex( k): colindex( k)+ nwind- 1)= [es_tmp( 1: noverlap)+ es_old;...
|
|
es_tmp( noverlap+ 1: nwind)];
|
|
%overlap-add
|
|
es_old= es_tmp( nwind- noverlap+ 1: nwind);
|
|
end
|
|
|
|
wavwrite( es_data, Fs, bits, outfile);
|
|
|
|
%------------------------------------------------------
|
|
|
|
function [CB_FREQ_INDICES]=find_CB_FREQ_INDICES(Fs,dft_length,nbits,frame_overlap)
|
|
% This function is from Matlab STSA Toolbox for Audio Signal Noise Reduction
|
|
% Copyright (C) 2001 Patrick J. Wolfe
|
|
|
|
freq_val = (0:Fs/dft_length:Fs/2)';
|
|
freq=freq_val;
|
|
crit_band_ends = [0;100;200;300;400;510;630;770;920;1080;1270;1480;1720;2000;2320;2700;3150;3700;4400;5300;6400;7700;9500;12000;15500;Inf];
|
|
imax = max(find(crit_band_ends < freq(end)));
|
|
num_bins = length(freq);
|
|
LIN_TO_BARK = zeros(imax,num_bins);
|
|
i = 1;
|
|
for j = 1:num_bins
|
|
while ~((freq(j) >= crit_band_ends(i)) & (freq(j) < crit_band_ends(i+1))),i = i+1;end
|
|
LIN_TO_BARK(i,j) = 1;
|
|
end
|
|
% Calculation of critical band frequency indices--i.e., which bins are in which critical band for i = 1:imax
|
|
for i=1:imax,
|
|
CB_FREQ_INDICES{i} = find(LIN_TO_BARK(i,:));
|
|
end
|
|
|