parallel-dequantization/dequantization_main.m at main · rajmic/parallel-dequantization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
% Dequantization of a signal from two parallel quantized observations
%
% parallel conversion of two branches giving observations y_1 and y_2
%
%          x
%       ___|___
%      |       |
%      B     Q_coarse
%     D_k      y_2
%    Q_fine
%     y_1
%
% B is anti-aliasing filter, D_k is downsampling, Q_fine is fine
% quantization, Q_coarse is coarse quantization
%
% the PEMO-Q audioqual is now inactive
%
% Vojtěch Kovanda
% Brno University of Technology, 2024


% using LTFAT toolbox
ltfatstart


%% input signal
 audiofile = 'test/violin_scales/A_staccato.wav';

 % the audio recordings are divided by instruments in the "test" folder

 % for example
 % 'test/violin_scales/A_staccato.wav'
 % 'test/trumpet_scales/A#.wav'
 % 'test/sax_tenor_scales/A#_legato.wav'
 % 'test/sax_alto_scales/D#_staccato.wav'
 % 'test/flute_scales/C_legato.wav'
 % 'test/clarinet_scales/F_legato.wav'

[x, param.fs] = audioread(audiofile);

% signal length
param.L = length(x);

% normalization
maxval = max(abs(x));
x = x/maxval;

%% generate observations y_1 and y_2

% setting conversion parameters
param.w1 = 16;           % bit depth (bps) of Q_fine
param.w2 = 8;            % bit depth (bps) of Q_coarse
param.k = 4;             % downsampling factor

% load impulse response of B for downsampling factor k = 4 and sampling
% frequency f_s = 48kHz
load("filter_coeffs_6cutoff.mat");
param.B = Num;
param.Bt = flip(param.B);

% first branch

% filtering (using convolution)
y1 = conv(x, param.B);

% signal length after filtering
param.L1 = length(y1);

% quantization
y1 = quant(y1, param.w1);

% downsampling
y1 = y1(1:param.k:end);
y1 = y1(1:floor(param.L1/param.k));

% second branch

% quantization
y2 = quant(x, param.w2);


%% settings for proposed algorithm (CVA)

% frame settings
param.winlen = 2048;            % window length
param.wtype = 'hann';           % window type
param.a = param.winlen/4;       % window shift
param.M = 2*param.winlen;       % number of frequency channels

% frame construction
param.F = frametight(frame('dgtreal', {param.wtype, param.winlen}, param.a, param.M));
param.F = frameaccel(param.F, param.L);

% algorithm parameters
param.lam = [0.0012 0.0012 0.0012 0.0012 0.0012 0.0001 0.00005 0.00002 0.00001 0.000005 0.000001 0.0000005 0.0000001]; % different clipping thresholds for different bit depths of y_2
param.rho = 0.8;
param.tau = 1;
param.sig = 1/2;

% maximal number of iteration
param.maxit = 200;

%% calling optimization algorithm

[xhat, SDR_t] = cv_alg(y1, y2, param, x);

%% evaluation

% SDR of reconstructed signal, SDR(xhat, x)
[SDR, bestit] = max(SDR_t);

% SDR of quantized signal, SDR(y2, x)
SDRq = 20*log10(norm(x,2)./norm(x-y2, 2));

% ODG of reconstructed signal, ODG(x, x)
% [~, ~, ODG] = audioqual(x, xhat, param.fs);

% ODG of quantized signal, ODG(y2, x)
% [~, ~, ODGq] = audioqual(x, y2, param.fs);

fprintf('SDR of the quantized signal is %4.3f dB.\n', SDRq);
fprintf('SDR of the reconstructed signal is %4.3f dB.\n', SDR);
% fprintf('ODG of the quantized signal is %4.3f.\n', ODGq);
% fprintf('ODG of the reconstructed signal is %4.3f.\n', ODG);

% plot results

figure;
plot(SDR_t);
ylabel('SDR (dB)');
xlabel('number of iteration');