function pesq_mos= pesq_psychoacoustic_model (ref_data, ref_Nsamples, deg_data, ... deg_Nsamples ) global CALIBRATE Nfmax Nb Sl Sp global nr_of_hz_bands_per_bark_band centre_of_band_bark global width_of_band_hz centre_of_band_hz width_of_band_bark global pow_dens_correction_factor abs_thresh_power global Downsample SEARCHBUFFER DATAPADDING_MSECS Fs Nutterances global Utt_Start Utt_End Utt_Delay NUMBER_OF_PSQM_FRAMES_PER_SYLLABE global Fs Plot_Frame % Plot_Frame= 75; % this is the frame whose spectrum will be plotted FALSE= 0; TRUE= 1; NUMBER_OF_PSQM_FRAMES_PER_SYLLABE= 20; maxNsamples = max (ref_Nsamples, deg_Nsamples); Nf = Downsample * 8; MAX_NUMBER_OF_BAD_INTERVALS = 1000; start_frame_of_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); stop_frame_of_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); start_sample_of_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); stop_sample_of_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); number_of_samples_in_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); delay_in_samples_in_bad_interval= zeros( 1, MAX_NUMBER_OF_BAD_INTERVALS); number_of_bad_intervals= 0; there_is_a_bad_frame= FALSE; Whanning= hann( Nf, 'periodic'); Whanning= Whanning'; D_POW_F = 2; D_POW_S = 6; D_POW_T = 2; A_POW_F = 1; A_POW_S = 6; A_POW_T = 2; D_WEIGHT= 0.1; A_WEIGHT= 0.0309; CRITERIUM_FOR_SILENCE_OF_5_SAMPLES = 500; samples_to_skip_at_start = 0; sum_of_5_samples= 0; while ((sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) ... && (samples_to_skip_at_start < maxNsamples / 2)) sum_of_5_samples= sum( abs( ref_data( samples_to_skip_at_start... + SEARCHBUFFER * Downsample + 1: samples_to_skip_at_start... + SEARCHBUFFER * Downsample + 5))); if (sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) samples_to_skip_at_start = samples_to_skip_at_start+ 1; end end % fprintf( 'samples_to_skip_at_start is %d\n', samples_to_skip_at_start); samples_to_skip_at_end = 0; sum_of_5_samples= 0; while ((sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) ... && (samples_to_skip_at_end < maxNsamples / 2)) sum_of_5_samples= sum( abs( ref_data( maxNsamples - ... SEARCHBUFFER* Downsample + DATAPADDING_MSECS* (Fs/ 1000) ... - samples_to_skip_at_end - 4: maxNsamples - ... SEARCHBUFFER* Downsample + DATAPADDING_MSECS* (Fs/ 1000) ... - samples_to_skip_at_end))); if (sum_of_5_samples< CRITERIUM_FOR_SILENCE_OF_5_SAMPLES) samples_to_skip_at_end = samples_to_skip_at_end+ 1; end end % fprintf( 'samples_to_skip_at_end is %d\n', samples_to_skip_at_end); start_frame = floor( samples_to_skip_at_start/ (Nf/ 2)); stop_frame = floor( (maxNsamples- 2* SEARCHBUFFER* Downsample ... + DATAPADDING_MSECS* (Fs/ 1000)- samples_to_skip_at_end) ... / (Nf/ 2))- 1; % number of frames in speech data plus DATAPADDING_MSECS % fprintf( 'start/end frame is %d/%d\n', start_frame, stop_frame); D_disturbance= zeros( stop_frame+ 1, Nb); DA_disturbance= zeros( stop_frame+ 1, Nb); power_ref = pow_of (ref_data, SEARCHBUFFER* Downsample, ... maxNsamples- SEARCHBUFFER* Downsample+ DATAPADDING_MSECS* (Fs/ 1000),... maxNsamples- 2* SEARCHBUFFER* Downsample+ DATAPADDING_MSECS* (Fs/ 1000)); power_deg = pow_of (deg_data, SEARCHBUFFER * Downsample, ... maxNsamples- SEARCHBUFFER* Downsample+ DATAPADDING_MSECS* (Fs/ 1000),... maxNsamples- 2* SEARCHBUFFER* Downsample+ DATAPADDING_MSECS* (Fs/ 1000)); % fprintf( 'ref/deg power is %f/%f\n', power_ref, power_deg); hz_spectrum_ref = zeros( 1, Nf/ 2); hz_spectrum_deg = zeros( 1, Nf/ 2); frame_is_bad = zeros( 1, stop_frame + 1); smeared_frame_is_bad = zeros( 1, stop_frame + 1); silent = zeros( 1, stop_frame + 1); pitch_pow_dens_ref = zeros( stop_frame + 1, Nb); pitch_pow_dens_deg = zeros( stop_frame + 1, Nb); frame_was_skipped = zeros( 1, stop_frame + 1); frame_disturbance = zeros( 1, stop_frame + 1); frame_disturbance_asym_add = zeros( 1, stop_frame + 1); avg_pitch_pow_dens_ref = zeros( 1, Nb); avg_pitch_pow_dens_deg = zeros( 1, Nb); loudness_dens_ref = zeros( 1, Nb); loudness_dens_deg = zeros( 1, Nb); deadzone = zeros( 1, Nb); disturbance_dens = zeros( 1, Nb); disturbance_dens_asym_add = zeros( 1, Nb); time_weight = zeros( 1, stop_frame + 1); total_power_ref = zeros( 1, stop_frame + 1); % fid= fopen( 'tmp_mat.txt', 'wt'); for frame = 0: stop_frame start_sample_ref = 1+ SEARCHBUFFER * Downsample + frame* (Nf/ 2); hz_spectrum_ref= short_term_fft (Nf, ref_data, Whanning, ... start_sample_ref); utt = Nutterances; while ((utt >= 1) && ((Utt_Start(utt)- 1)* Downsample+ 1 ... > start_sample_ref)) utt= utt - 1; end if (utt >= 1) delay = Utt_Delay(utt); else delay = Utt_Delay(1); end start_sample_deg = start_sample_ref + delay; if ((start_sample_deg > 0) && (start_sample_deg + Nf- 1 < ... maxNsamples+ DATAPADDING_MSECS* (Fs/ 1000))) hz_spectrum_deg= short_term_fft (Nf, deg_data, Whanning, ... start_sample_deg); else hz_spectrum_deg( 1: Nf/ 2)= 0; end pitch_pow_dens_ref( frame+ 1, :)= freq_warping (... hz_spectrum_ref, Nb, frame); %peak = maximum_of (pitch_pow_dens_ref, 0, Nb); pitch_pow_dens_deg( frame+ 1, :)= freq_warping (... hz_spectrum_deg, Nb, frame); total_audible_pow_ref = total_audible (frame, pitch_pow_dens_ref, 1E2); total_audible_pow_deg = total_audible (frame, pitch_pow_dens_deg, 1E2); silent(frame+ 1) = (total_audible_pow_ref < 1E7); end % fclose( fid); avg_pitch_pow_dens_ref= time_avg_audible_of (stop_frame + 1, ... silent, pitch_pow_dens_ref, floor((maxNsamples- 2* SEARCHBUFFER* ... Downsample+ DATAPADDING_MSECS* (Fs/ 1000))/ (Nf / 2))- 1); avg_pitch_pow_dens_deg= time_avg_audible_of (stop_frame + 1, ... silent, pitch_pow_dens_deg, floor((maxNsamples- 2* SEARCHBUFFER* ... Downsample+ DATAPADDING_MSECS* (Fs/ 1000))/ (Nf/ 2))- 1); % fid= fopen( 'tmp_mat.txt', 'wt'); % fprintf( fid, '%f\n', avg_pitch_pow_dens_deg); % fclose( fid); if (CALIBRATE== 0) pitch_pow_dens_ref= freq_resp_compensation (stop_frame + 1, ... pitch_pow_dens_ref, avg_pitch_pow_dens_ref, ... avg_pitch_pow_dens_deg, 1000); if (Plot_Frame>= 0) % plot pitch_pow_dens_ref figure; subplot( 1, 2, 1); plot( centre_of_band_hz, 10* log10( eps+ ... pitch_pow_dens_ref( Plot_Frame+ 1, :))); axis( [0 Fs/2 0 95]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'reference signal bark spectrum with frequency compensation'); subplot( 1, 2, 2); plot( centre_of_band_hz, 10* log10( eps+ ... pitch_pow_dens_deg( Plot_Frame+ 1, :))); axis( [0 Fs/2 0 95]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'degraded signal bark spectrum'); end end % tmp1= pitch_pow_dens_ref'; MAX_SCALE = 5.0; MIN_SCALE = 3e-4; oldScale = 1; THRESHOLD_BAD_FRAMES = 30; for frame = 0: stop_frame total_audible_pow_ref = total_audible (frame, pitch_pow_dens_ref, 1); total_audible_pow_deg = total_audible (frame, pitch_pow_dens_deg, 1); total_power_ref (1+ frame) = total_audible_pow_ref; scale = (total_audible_pow_ref + 5e3)/ (total_audible_pow_deg + 5e3); if (frame > 0) scale = 0.2 * oldScale + 0.8 * scale; end oldScale = scale; if (scale > MAX_SCALE) scale = MAX_SCALE; elseif (scale < MIN_SCALE) scale = MIN_SCALE; end pitch_pow_dens_deg( 1+ frame, :) = ... pitch_pow_dens_deg( 1+ frame, :) * scale; if (frame== Plot_Frame) figure; subplot( 1, 2, 1); plot( centre_of_band_hz, 10* log10( eps+ ... pitch_pow_dens_ref( Plot_Frame+ 1, :))); axis( [0 Fs/2 0 95]); %xlabel( 'Hz'); ylabel( 'Db'); subplot( 1, 2, 2); plot( centre_of_band_hz, 10* log10( eps+ ... pitch_pow_dens_deg( Plot_Frame+ 1, :))); axis( [0 Fs/2 0 95]); %xlabel( 'Hz'); ylabel( 'Db'); end loudness_dens_ref = intensity_warping_of (frame, pitch_pow_dens_ref); loudness_dens_deg = intensity_warping_of (frame, pitch_pow_dens_deg); disturbance_dens = loudness_dens_deg - loudness_dens_ref; if (frame== Plot_Frame) figure; subplot( 1, 2, 1); plot( centre_of_band_hz, 10* log10( eps+ ... loudness_dens_ref)); axis( [0 Fs/2 0 15]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'reference signal loudness density'); subplot( 1, 2, 2); plot( centre_of_band_hz, 10* log10( eps+ ... loudness_dens_deg)); axis( [0 Fs/2 0 15]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'degraded signal loudness density'); end for band =1: Nb deadzone (band) = 0.25* min (loudness_dens_deg (band), ... loudness_dens_ref (band)); end for band = 1: Nb d = disturbance_dens (band); m = deadzone (band); if (d > m) disturbance_dens (band) = disturbance_dens (band)- m; % disturbance_dens (band) = d- m; else if (d < -m) disturbance_dens (band) = disturbance_dens (band)+ m; % disturbance_dens (band) = d+ m; else disturbance_dens (band) = 0; end end end if (frame== Plot_Frame) figure; subplot( 1, 2, 1); plot( centre_of_band_hz, disturbance_dens); axis( [0 Fs/2 -1 50]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'disturbance'); end D_disturbance( frame+ 1, :)= disturbance_dens; frame_disturbance (1+ frame) = pseudo_Lp (disturbance_dens, D_POW_F); if (frame_disturbance (1+ frame) > THRESHOLD_BAD_FRAMES) there_is_a_bad_frame = TRUE; end disturbance_dens= multiply_with_asymmetry_factor (... disturbance_dens, frame, pitch_pow_dens_ref, pitch_pow_dens_deg); if (frame== Plot_Frame) subplot( 1, 2, 2); plot( centre_of_band_hz, disturbance_dens); axis( [0 Fs/2 -1 50]); %xlabel( 'Hz'); ylabel( 'Db'); title( 'disturbance after asymmetry processing'); end DA_disturbance( frame+ 1, :)= disturbance_dens; frame_disturbance_asym_add (1+ frame) = ... pseudo_Lp (disturbance_dens, A_POW_F); end % fid= fopen( 'tmp_mat.txt', 'wt'); % fprintf( fid, '%f\n', frame_disturbance); % fclose( fid); frame_was_skipped (1: 1+ stop_frame) = FALSE; for utt = 2: Nutterances frame1 = floor (((Utt_Start(utt)- 1- SEARCHBUFFER )* Downsample+ 1+ ... Utt_Delay(utt))/ (Nf/ 2)); j = floor( floor(((Utt_End(utt-1)- 1- SEARCHBUFFER)* Downsample+ 1+ ... Utt_Delay(utt-1)))/(Nf/ 2)); delay_jump = Utt_Delay(utt) - Utt_Delay(utt-1); if (frame1 > j) frame1 = j; elseif (frame1 < 0) frame1 = 0; end % fprintf( 'frame1, j, delay_jump is %d, %d, %d\n', frame1, ... % j, delay_jump); if (delay_jump < -(Nf/ 2)) frame2 = floor (((Utt_Start(utt)- 1- SEARCHBUFFER)* Downsample+ 1 ... + max (0, abs (delay_jump)))/ (Nf/ 2)) + 1; for frame = frame1: frame2 if (frame < stop_frame) frame_was_skipped (1+ frame) = TRUE; frame_disturbance (1+ frame) = 0; frame_disturbance_asym_add (1+ frame) = 0; end end end end nn = DATAPADDING_MSECS* (Fs/ 1000) + maxNsamples; tweaked_deg = zeros( 1, nn); % fprintf( 'nn is %d\n', nn); for i= SEARCHBUFFER* Downsample+ 1: nn- SEARCHBUFFER* Downsample utt = Nutterances; while ((utt >= 1) && ((Utt_Start (utt)- 1)* Downsample> i)) utt = utt- 1; end if (utt >= 1) delay = Utt_Delay (utt); else delay = Utt_Delay (1); end j = i + delay; if (j < SEARCHBUFFER * Downsample+ 1) j = SEARCHBUFFER * Downsample+ 1; end if (j > nn - SEARCHBUFFER * Downsample) j = nn - SEARCHBUFFER * Downsample; end tweaked_deg (i) = deg_data (j); end if (there_is_a_bad_frame) for frame = 0: stop_frame frame_is_bad (1+ frame) = (frame_disturbance (1+ frame)... > THRESHOLD_BAD_FRAMES); smeared_frame_is_bad (1+ frame) = FALSE; end frame_is_bad (1) = FALSE; SMEAR_RANGE = 2; for frame = SMEAR_RANGE: stop_frame- 1- SMEAR_RANGE max_itself_and_left = frame_is_bad (1+ frame); max_itself_and_right = frame_is_bad (1+ frame); for i = -SMEAR_RANGE: 0 if (max_itself_and_left < frame_is_bad (1+ frame+ i)) max_itself_and_left = frame_is_bad (1+ frame+ i); end end for i = 0: SMEAR_RANGE if (max_itself_and_right < frame_is_bad (1+ frame + i)) max_itself_and_right = frame_is_bad (1+ frame + i); end end mini = max_itself_and_left; if (mini > max_itself_and_right) mini = max_itself_and_right; end smeared_frame_is_bad (1+ frame) = mini; end MINIMUM_NUMBER_OF_BAD_FRAMES_IN_BAD_INTERVAL = 5; number_of_bad_intervals = 0; frame = 0; while (frame <= stop_frame) while ((frame <= stop_frame) && (~smeared_frame_is_bad (1+ frame))) frame= frame+ 1; end if (frame <= stop_frame) start_frame_of_bad_interval(1+ number_of_bad_intervals)= ... 1+ frame; while ((frame <= stop_frame) && (... smeared_frame_is_bad (1+ frame))) frame= frame+ 1; end if (frame <= stop_frame) stop_frame_of_bad_interval(1+ number_of_bad_intervals)= ... 1+ frame; if (stop_frame_of_bad_interval(1+ number_of_bad_intervals)- ... start_frame_of_bad_interval(1+ number_of_bad_intervals)... >= MINIMUM_NUMBER_OF_BAD_FRAMES_IN_BAD_INTERVAL) number_of_bad_intervals= number_of_bad_intervals+ 1; end end end end for bad_interval = 0: number_of_bad_intervals - 1 start_sample_of_bad_interval(1+ bad_interval) = ... (start_frame_of_bad_interval(1+ bad_interval)- 1) * (Nf/ 2) ... + SEARCHBUFFER * Downsample+ 1; stop_sample_of_bad_interval(1+ bad_interval) = ... (stop_frame_of_bad_interval(1+ bad_interval)- 1) * (Nf/ 2) ... + Nf + SEARCHBUFFER* Downsample; if (stop_frame_of_bad_interval(1+ bad_interval) > stop_frame+ 1) stop_frame_of_bad_interval(1+ bad_interval) = stop_frame+ 1; end number_of_samples_in_bad_interval(1+ bad_interval) = ... stop_sample_of_bad_interval(1+ bad_interval) - ... start_sample_of_bad_interval(1+ bad_interval)+ 1; end % fprintf( 'number of bad intervals %d\n', number_of_bad_intervals); % fprintf( '%d %d\n', number_of_samples_in_bad_interval(1), ... % number_of_samples_in_bad_interval(2)); % fprintf( '%d %d\n', start_sample_of_bad_interval(1), ... % start_sample_of_bad_interval(2)); SEARCH_RANGE_IN_TRANSFORM_LENGTH = 4; search_range_in_samples= SEARCH_RANGE_IN_TRANSFORM_LENGTH * Nf; for bad_interval= 0: number_of_bad_intervals- 1 ref = zeros (1, 2 * search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval)); deg = zeros (1, 2 * search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval)); ref(1: search_range_in_samples) = 0; ref (search_range_in_samples+ 1: search_range_in_samples+ ... number_of_samples_in_bad_interval (1+ bad_interval)) = ... ref_data (start_sample_of_bad_interval( 1+ bad_interval) + 1: ... start_sample_of_bad_interval( 1+ bad_interval) + ... number_of_samples_in_bad_interval (1+ bad_interval)); ref (search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval) + 1: ... search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval) + ... search_range_in_samples) = 0; for i = 0: 2 * search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval) - 1 j = start_sample_of_bad_interval (1+ bad_interval) - ... search_range_in_samples + i; nn = maxNsamples - SEARCHBUFFER * Downsample + ... DATAPADDING_MSECS * (Fs / 1000); if (j <= SEARCHBUFFER * Downsample) j = SEARCHBUFFER * Downsample+ 1; end if (j > nn) j = nn; end deg (1+ i) = tweaked_deg (j); end [delay_in_samples, best_correlation]= compute_delay ... (1, 2 * search_range_in_samples + ... number_of_samples_in_bad_interval (1+ bad_interval), ... search_range_in_samples, ref, deg); delay_in_samples_in_bad_interval (1+ bad_interval) = ... delay_in_samples; % fprintf( 'delay_in_samples, best_correlation is \n\t%d, %f\n', ... % delay_in_samples, best_correlation); % if (best_correlation < 0.5) delay_in_samples_in_bad_interval (1+ bad_interval) = 0; end end if (number_of_bad_intervals > 0) doubly_tweaked_deg = tweaked_deg( 1: maxNsamples + ... DATAPADDING_MSECS * (Fs / 1000)); for bad_interval= 0: number_of_bad_intervals- 1 delay = delay_in_samples_in_bad_interval (1+ bad_interval); for i = start_sample_of_bad_interval (1+ bad_interval): ... stop_sample_of_bad_interval (1+ bad_interval) j = i + delay; if (j < 1) j = 1; end if (j > maxNsamples) j = maxNsamples; end h = tweaked_deg (j); doubly_tweaked_deg (i) = h; end end untweaked_deg = deg_data; deg_data = doubly_tweaked_deg; for bad_interval= 0: number_of_bad_intervals- 1 for frame = start_frame_of_bad_interval (1+ bad_interval): ... stop_frame_of_bad_interval (1+ bad_interval)- 1 frame= frame- 1; start_sample_ref = SEARCHBUFFER * Downsample + ... frame * Nf / 2+ 1; start_sample_deg = start_sample_ref; hz_spectrum_deg= short_term_fft (Nf, deg_data, ... Whanning, start_sample_deg); pitch_pow_dens_deg( 1+ frame, :)= freq_warping (... hz_spectrum_deg, Nb, frame); end oldScale = 1; for frame = start_frame_of_bad_interval (1+ bad_interval): ... stop_frame_of_bad_interval (1+ bad_interval)- 1 frame= frame- 1; % see implementation for detail why 1 needed to be % subtracted total_audible_pow_ref = total_audible (frame, ... pitch_pow_dens_ref, 1); total_audible_pow_deg = total_audible (frame, ... pitch_pow_dens_deg, 1); scale = (total_audible_pow_ref + 5e3) / ... (total_audible_pow_deg + 5e3); if (frame > 0) scale = 0.2 * oldScale + 0.8*scale; end oldScale = scale; if (scale > MAX_SCALE) scale = MAX_SCALE; end if (scale < MIN_SCALE) scale = MIN_SCALE; end pitch_pow_dens_deg (1+ frame, :) = ... pitch_pow_dens_deg (1+ frame, :)* scale; loudness_dens_ref= intensity_warping_of (frame, ... pitch_pow_dens_ref); loudness_dens_deg= intensity_warping_of (frame, ... pitch_pow_dens_deg); disturbance_dens = loudness_dens_deg - loudness_dens_ref; for band = 1: Nb deadzone(band) = min (loudness_dens_deg(band), ... loudness_dens_ref(band)); deadzone(band) = deadzone(band)* 0.25; end for band = 1: Nb d = disturbance_dens (band); m = deadzone (band); if (d > m) disturbance_dens (band) = ... disturbance_dens (band)- m; else if (d < -m) disturbance_dens (band) = ... disturbance_dens (band)+ m; else disturbance_dens (band) = 0; end end end frame_disturbance( 1+ frame) = min (... frame_disturbance( 1+ frame), pseudo_Lp(... disturbance_dens, D_POW_F)); disturbance_dens= multiply_with_asymmetry_factor ... (disturbance_dens, frame, pitch_pow_dens_ref, ... pitch_pow_dens_deg); frame_disturbance_asym_add(1+ frame) = min (... frame_disturbance_asym_add(1+ frame), ... pseudo_Lp (disturbance_dens, A_POW_F)); end end deg_data = untweaked_deg; end end for frame = 0: stop_frame h = 1; if (stop_frame + 1 > 1000) n = floor( (maxNsamples - 2 * SEARCHBUFFER * Downsample)... / (Nf / 2)) - 1; timeWeightFactor = (n - 1000) / 5500; if (timeWeightFactor > 0.5) timeWeightFactor = 0.5; end h = (1.0 - timeWeightFactor) + timeWeightFactor * frame / n; end time_weight (1 +frame) = h; end % fid= fopen( 'tmp_mat1.txt', 'at'); % fprintf( '\n'); for frame = 0: stop_frame h = ((total_power_ref (1+ frame) + 1e5) / 1e7)^ 0.04; % if (frame== 118) % fprintf( '%f\n', h); % fprintf( '%f\n', frame_disturbance( 1+ frame)); % end frame_disturbance( 1+ frame) = frame_disturbance( 1+ frame)/ h; % if (frame== 118) % fprintf( '%f\n', frame_disturbance( 1+ frame)); % end % frame_disturbance_asym_add( 1+ frame) = ... frame_disturbance_asym_add( 1+ frame)/ h; if (frame_disturbance( 1+ frame) > 45) frame_disturbance( 1+ frame) = 45; end if (frame_disturbance_asym_add( 1+ frame)> 45) frame_disturbance_asym_add( 1+ frame) = 45; end end % fclose ( fid); d_indicator = Lpq_weight (start_frame, stop_frame, ... D_POW_S, D_POW_T, frame_disturbance, time_weight); a_indicator = Lpq_weight (start_frame, stop_frame, ... A_POW_S, A_POW_T, frame_disturbance_asym_add, time_weight); pesq_mos = 4.5 - D_WEIGHT * d_indicator - A_WEIGHT * a_indicator; if (Plot_Frame> 0) figure; subplot( 1, 2, 1); mesh( 0: stop_frame, centre_of_band_hz, D_disturbance'); title( 'disturbance'); subplot( 1, 2, 2); mesh( 0: stop_frame, centre_of_band_hz, DA_disturbance'); title( 'disturbance after asymmetry processing'); end % fid= fopen( 'tmp_mat.txt', 'wt'); % fprintf( fid, 'time_weight\n'); % fprintf( fid, '%f\n', time_weight); % fprintf( fid, 'frame_disturbance:\n'); % fprintf( fid, '%f\n', frame_disturbance); % fprintf( fid, 'frame_disturbance_asym_add\n'); % fprintf( fid, '%f\n', frame_disturbance_asym_add); % fclose( fid); function result_time= Lpq_weight(start_frame, stop_frame, ... power_syllable, power_time, frame_disturbance, time_weight) global NUMBER_OF_PSQM_FRAMES_PER_SYLLABE % fid= fopen( 'tmp_mat1.txt', 'at'); % fprintf( 'result_time:\n'); result_time= 0; total_time_weight_time = 0; % fprintf( 'start/end frame: %d/%d\n', start_frame, stop_frame); for start_frame_of_syllable = start_frame: ... NUMBER_OF_PSQM_FRAMES_PER_SYLLABE/2: stop_frame result_syllable = 0; count_syllable = 0; for frame = start_frame_of_syllable: ... start_frame_of_syllable + NUMBER_OF_PSQM_FRAMES_PER_SYLLABE- 1 if (frame <= stop_frame) h = frame_disturbance(1+ frame); % if (start_frame_of_syllable== 101) % fprintf( fid, '%f\n', h); % end result_syllable = result_syllable+ (h^ power_syllable); end count_syllable = count_syllable+ 1; end result_syllable = result_syllable/ count_syllable; result_syllable = result_syllable^ (1/power_syllable); result_time= result_time+ (time_weight (... 1+ start_frame_of_syllable - start_frame) * ... result_syllable)^ power_time; total_time_weight_time = total_time_weight_time+ ... time_weight (1+ start_frame_of_syllable - start_frame)^ power_time; % fprintf( fid, '%f\n', result_time); end % fclose (fid); % fprintf( 'total_time_weight_time is %f\n', total_time_weight_time); result_time = result_time/ total_time_weight_time; result_time= result_time^ (1/ power_time); % fprintf( 'result_time is %f\n\n', result_time); function [best_delay, max_correlation] = compute_delay (... start_sample, stop_sample, search_range, ... time_series1, time_series2) n = stop_sample - start_sample+ 1; power_of_2 = 2^ (ceil( log2( 2 * n))); power1 = pow_of (time_series1, start_sample, stop_sample, n)* ... n/ power_of_2; power2 = pow_of (time_series2, start_sample, stop_sample, n)* ... n/ power_of_2; normalization = sqrt (power1 * power2); % fprintf( 'normalization is %f\n', normalization); if ((power1 <= 1e-6) || (power2 <= 1e-6)) max_correlation = 0; best_delay= 0; end x1( 1: power_of_2)= 0; x2( 1: power_of_2)= 0; y( 1: power_of_2)= 0; x1( 1: n)= abs( time_series1( start_sample: ... stop_sample)); x2( 1: n)= abs( time_series2( start_sample: ... stop_sample)); x1_fft= fft( x1, power_of_2)/ power_of_2; x2_fft= fft( x2, power_of_2); x1_fft_conj= conj( x1_fft); y= ifft( x1_fft_conj.* x2_fft, power_of_2); best_delay = 0; max_correlation = 0; % these loop can be rewritten for i = -search_range: -1 h = abs (y (1+ i + power_of_2)) / normalization; if (h > max_correlation) max_correlation = h; best_delay= i; end end for i = 0: search_range- 1 h = abs (y (1+i)) / normalization; if (h > max_correlation) max_correlation = h; best_delay= i; end end best_delay= best_delay- 1; function mod_disturbance_dens= multiply_with_asymmetry_factor (... disturbance_dens, frame, pitch_pow_dens_ref, pitch_pow_dens_deg) global Nb for i = 1: Nb ratio = (pitch_pow_dens_deg(1+ frame, i) + 50)... / (pitch_pow_dens_ref (1+ frame, i) + 50); h = ratio^ 1.2; if (h > 12) h = 12; elseif (h < 3) h = 0.0; end mod_disturbance_dens (i) = disturbance_dens (i) * h; end function loudness_dens = intensity_warping_of (... frame, pitch_pow_dens) global abs_thresh_power Sl Nb centre_of_band_bark ZWICKER_POWER= 0.23; for band = 1: Nb threshold = abs_thresh_power (band); input = pitch_pow_dens (1+ frame, band); if (centre_of_band_bark (band) < 4) h = 6 / (centre_of_band_bark (band) + 2); else h = 1; end if (h > 2) h = 2; end h = h^ 0.15; modified_zwicker_power = ZWICKER_POWER * h; if (input > threshold) loudness_dens (band) = ((threshold / 0.5)^ modified_zwicker_power)... * ((0.5 + 0.5 * input / threshold)^ modified_zwicker_power- 1); else loudness_dens (band) = 0; end loudness_dens (band) = loudness_dens (band)* Sl; end function result= pseudo_Lp (x, p) global Nb width_of_band_bark totalWeight = 0; result = 0; for band = 2: Nb h = abs (x (band)); w = width_of_band_bark (band); prod = h * w; result = result+ prod^ p; totalWeight = totalWeight+ w; end result = (result/ totalWeight)^ (1/p); result = result* totalWeight; function mod_pitch_pow_dens_ref= freq_resp_compensation (number_of_frames, ... pitch_pow_dens_ref, avg_pitch_pow_dens_ref, ... avg_pitch_pow_dens_deg, constant) global Nb for band = 1: Nb x = (avg_pitch_pow_dens_deg (band) + constant) / ... (avg_pitch_pow_dens_ref (band) + constant); if (x > 100.0) x = 100.0; elseif (x < 0.01) x = 0.01; end for frame = 1: number_of_frames mod_pitch_pow_dens_ref(frame, band) = ... pitch_pow_dens_ref(frame, band) * x; end end function avg_pitch_pow_dens= time_avg_audible_of(number_of_frames, ... silent, pitch_pow_dens, total_number_of_frames) global Nb abs_thresh_power for band = 1: Nb result = 0; for frame = 1: number_of_frames if (~silent (frame)) h = pitch_pow_dens (frame, band); if (h > 100 * abs_thresh_power (band)) result = result + h; end end avg_pitch_pow_dens (band) = result/ total_number_of_frames; end end function hz_spectrum= short_term_fft (Nf, data, Whanning, start_sample) x1= data( start_sample: start_sample+ Nf-1).* Whanning; x1_fft= fft( x1); hz_spectrum= abs( x1_fft( 1: Nf/ 2)).^ 2; hz_spectrum( 1)= 0; function pitch_pow_dens= freq_warping( hz_spectrum, Nb, frame) global nr_of_hz_bands_per_bark_band pow_dens_correction_factor global Sp hz_band = 1; for bark_band = 1: Nb n = nr_of_hz_bands_per_bark_band (bark_band); sum = 0; for i = 1: n sum = sum+ hz_spectrum( hz_band); hz_band= hz_band+ 1; end sum = sum* pow_dens_correction_factor (bark_band); sum = sum* Sp; pitch_pow_dens (bark_band) = sum; end function total_audible_pow = total_audible (frame, ... pitch_pow_dens, factor) global Nb abs_thresh_power total_audible_pow = 0; for band= 2: Nb h = pitch_pow_dens (frame+ 1,band); threshold = factor * abs_thresh_power (band); if (h > threshold) total_audible_pow = total_audible_pow+ h; end end