133 lines
5.1 KiB
Matlab
133 lines
5.1 KiB
Matlab
function [snr_mean, segsnr_mean]= comp_SNR(cleanFile, enhdFile);
|
|
%
|
|
% Segmental Signal-to-Noise Ratio Objective Speech Quality Measure
|
|
%
|
|
% This function implements the segmental signal-to-noise ratio
|
|
% as defined in [1, p. 45] (see Equation 2.12).
|
|
%
|
|
% Usage: [SNRovl, SNRseg]=comp_snr(cleanFile.wav, enhancedFile.wav)
|
|
%
|
|
% cleanFile.wav - clean input file in .wav format
|
|
% enhancedFile - enhanced output file in .wav format
|
|
% SNRovl - overall SNR (dB)
|
|
% SNRseg - segmental SNR (dB)
|
|
%
|
|
% This function returns 2 parameters. The first item is the
|
|
% overall SNR for the two speech signals. The second value
|
|
% is the segmental signal-to-noise ratio (1 seg-snr per
|
|
% frame of input). The segmental SNR is clamped to range
|
|
% between 35dB and -10dB (see suggestions in [2]).
|
|
%
|
|
% Example call: [SNRovl,SNRseg]=comp_SNR('sp04.wav','enhanced.wav')
|
|
%
|
|
% References:
|
|
%
|
|
% [1] S. R. Quackenbush, T. P. Barnwell, and M. A. Clements,
|
|
% Objective Measures of Speech Quality. Prentice Hall
|
|
% Advanced Reference Series, Englewood Cliffs, NJ, 1988,
|
|
% ISBN: 0-13-629056-6.
|
|
%
|
|
% [2] P. E. Papamichalis, Practical Approaches to Speech
|
|
% Coding, Prentice-Hall, Englewood Cliffs, NJ, 1987.
|
|
% ISBN: 0-13-689019-9. (see pages 179-181).
|
|
%
|
|
% Authors: Bryan L. Pellom and John H. L. Hansen (July 1998)
|
|
% Modified by: Philipos C. Loizou (Oct 2006)
|
|
%
|
|
% Copyright (c) 2006 by Philipos C. Loizou
|
|
% $Revision: 0.0 $ $Date: 10/09/2006 $
|
|
%-------------------------------------------------------------------------
|
|
|
|
if nargin ~=2
|
|
fprintf('USAGE: [snr_mean, segsnr_mean]= comp_SNR(cleanFile, enhdFile) \n');
|
|
return;
|
|
end
|
|
|
|
[data1, Srate1, Nbits1]= wavread(cleanFile);
|
|
[data2, Srate2, Nbits2]= wavread(enhdFile);
|
|
if (( Srate1~= Srate2) | ( Nbits1~= Nbits2))
|
|
error( 'The two files do not match!\n');
|
|
end
|
|
|
|
len= min( length( data1), length( data2));
|
|
data1= data1( 1: len);
|
|
data2= data2( 1: len);
|
|
|
|
[snr_dist, segsnr_dist]= snr( data1, data2,Srate1);
|
|
|
|
snr_mean= snr_dist;
|
|
segsnr_mean= mean( segsnr_dist);
|
|
|
|
|
|
% =========================================================================
|
|
function [overall_snr, segmental_snr] = snr(clean_speech, processed_speech,sample_rate)
|
|
|
|
% ----------------------------------------------------------------------
|
|
% Check the length of the clean and processed speech. Must be the same.
|
|
% ----------------------------------------------------------------------
|
|
|
|
clean_length = length(clean_speech);
|
|
processed_length = length(processed_speech);
|
|
|
|
if (clean_length ~= processed_length)
|
|
disp('Error: Both Speech Files must be same length.');
|
|
return
|
|
end
|
|
|
|
% ----------------------------------------------------------------------
|
|
% Scale both clean speech and processed speech to have same dynamic
|
|
% range. Also remove DC component from each signal
|
|
% ----------------------------------------------------------------------
|
|
|
|
%clean_speech = clean_speech - mean(clean_speech);
|
|
%processed_speech = processed_speech - mean(processed_speech);
|
|
|
|
%processed_speech = processed_speech.*(max(abs(clean_speech))/ max(abs(processed_speech)));
|
|
|
|
overall_snr = 10* log10( sum(clean_speech.^2)/sum((clean_speech-processed_speech).^2));
|
|
|
|
% ----------------------------------------------------------------------
|
|
% Global Variables
|
|
% ----------------------------------------------------------------------
|
|
|
|
|
|
winlength = round(30*sample_rate/1000); %240; % window length in samples for 30-msecs
|
|
skiprate = floor(winlength/4); %60; % window skip in samples
|
|
MIN_SNR = -10; % minimum SNR in dB
|
|
MAX_SNR = 35; % maximum SNR in dB
|
|
|
|
% ----------------------------------------------------------------------
|
|
% For each frame of input speech, calculate the Segmental SNR
|
|
% ----------------------------------------------------------------------
|
|
|
|
num_frames = clean_length/skiprate-(winlength/skiprate); % number of frames
|
|
start = 1; % starting sample
|
|
window = 0.5*(1 - cos(2*pi*(1:winlength)'/(winlength+1)));
|
|
|
|
for frame_count = 1: num_frames
|
|
|
|
% ----------------------------------------------------------
|
|
% (1) Get the Frames for the test and reference speech.
|
|
% Multiply by Hanning Window.
|
|
% ----------------------------------------------------------
|
|
|
|
clean_frame = clean_speech(start:start+winlength-1);
|
|
processed_frame = processed_speech(start:start+winlength-1);
|
|
clean_frame = clean_frame.*window;
|
|
processed_frame = processed_frame.*window;
|
|
|
|
% ----------------------------------------------------------
|
|
% (2) Compute the Segmental SNR
|
|
% ----------------------------------------------------------
|
|
|
|
signal_energy = sum(clean_frame.^2);
|
|
noise_energy = sum((clean_frame-processed_frame).^2);
|
|
segmental_snr(frame_count) = 10*log10(signal_energy/(noise_energy+eps)+eps);
|
|
segmental_snr(frame_count) = max(segmental_snr(frame_count),MIN_SNR);
|
|
segmental_snr(frame_count) = min(segmental_snr(frame_count),MAX_SNR);
|
|
|
|
start = start + skiprate;
|
|
|
|
end
|
|
|