inst/sampling/stk_sampcrit_akg_eval.m


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

% STK_SAMPCRIT_AKG_EVAL computes the Approximate KG criterion
%
% CALL: AKG = stk_sampcrit_akg_eval (ZC_MEAN, ZC_STD, ZR_MEAN, ZR_STD, ZCR_COV)
%
%    computes the value AKG of the Approximate KG criterion for a set of
%    candidates points, with respect to a certain reference grid.  The
%    predictive distributions of the objective function (to be minimized) at
%    the candidates and reference points is assumed to be jointly Gaussian,
%    with mean ZC_MEAN and standard deviation ZC_STD for the candidate points,
%    mean ZR_MEAN and satandard deviation ZR_STD on the reference points, and
%    covariance matrix ZCR_COV between the candidate and reference points.
%    The input argument must have the following sizes:
%
%       * ZC_MEAN    M x 1,
%       * ZC_STD     M x 1,
%       * ZR_MEAN    L x 1,
%       * ZR_STD     L x 1,
%       * ZCR_COV    M x L,
%
%    where M is the number of candidate points and L the number of reference
%    points.  The output has size M x 1.
%
% NOTE ABOUT THE "KNOWLEDGE GRADIENT" CRITERION
%
%    The "Knowlegde Gradient" (KG) criterion is the one-step look-ahead (a.k.a
%    myopic) sampling criterion associated to the problem of estimating the
%    minimizer of the objective function under the L^1 loss (equivalently,
%    under the linear loss/utility).
%
%    This sampling strategy was proposed for the first time in the work of
%    Mockus and co-authors in the 70's (see [1] and refs therein), for the case
%    of noiseless evaluations, but only applied to particular Brownian-like
%    processes for which the minimum of the posterior mean coincides with the
%    best evaluations so far (in which case the KG criterion coincides with the
%    EI criterion introduced later by Jones et al [2]).
%
%    It was later discussed for the case of a finite space with independent
%    Gaussian priors first by Gupta and Miescke [3] and then by Frazier et al
%    [4] who named it "knowledge gradient".  It was extended to the case of
%    correlated priors by Frazier et al [5].
%
% NOTE ABOUT THE REFERENCE SET
%
%    For the case of continuous input spaces, there is no exact expression of
%    the KG criterion.  The approximate KG criterion proposed in this function
%    is an approximation of the KG criterion where the continuous 'min' in the
%    expression of the criterion at the i^th candidate point are replaced by
%    discrete mins over some reference grid *augmented* with the i^th candidate
%    point.
%
%    This type of approximation has been proposed by Scott et al [6] under the
%    name "knowledge gradient for continuous parameters" (KGCP).  In [6], the
%    reference grid is composed of the current set of evaluation points.  The
%    implementation proposed in STK leaves this choice to the user.
%
%    Note that, with the reference grid proposed in [6], the complexity of one
%    evaluation of the AKG (KGCP) criterion increases as O(N log N), where N
%    denotes the number of evaluation points.
%
% NOTE ABOUT THE NOISELESS CASE
%
%    Simplified formulas are available for the noiseless case (see [7]) but not
%    currenly implemented in STK.
%
% REFERENCES
%
%   [1] J. Mockus, V. Tiesis and A. Zilinskas. The application of Bayesian
%       methods for seeking the extremum. In L.C.W. Dixon and G.P. Szego, eds,
%       Towards Global Optimization, 2:117-129, North Holland NY, 1978.
%
%   [2] D. R. Jones, M. Schonlau and William J. Welch. Efficient global
%       optimization of expensive black-box functions.  Journal of Global
%       Optimization, 13(4):455-492, 1998.
%
%   [3] S. Gupta and K. Miescke,  Bayesian look ahead one-stage sampling
%       allocations for selection of the best population,  J. Statist. Plann.
%       Inference, 54:229-244, 1996.
%
%   [4] P. I. Frazier, W. B. Powell, S. Dayanik,  A knowledge gradient policy
%       for sequential information collection,  SIAM J. Control Optim.,
%       47(5):2410-2439, 2008.
%
%   [5] P. I. Frazier, W. B. Powell, and S. Dayanik.  The Knowledge-Gradient
%       Policy for Correlated Normal Beliefs.  INFORMS Journal on Computing
%       21(4):599-613, 2009.
%
%   [6] W. Scott, P. I. Frazier and W. B. Powell.  The correlated knowledge
%       gradient for simulation optimization of continuous parameters using
%       Gaussian process regression.  SIAM J. Optim, 21(3):996-1026, 2011.
%
%   [7] J. van der Herten, I. Couckuyt, D. Deschrijver, T. Dhaene,  Fast
%       Calculation of the Knowledge Gradient for Optimization of Deterministic
%       Engineering Simulations,  arXiv preprint arXiv:1608.04550
%
% See also: STK_SAMPCRIT_EI_EVAL

% Copyright Notice
%
%    Copyright (C) 2017, 2018, 2021 CentraleSupelec
%
%    Author:  Julien Bect  <julien.bect@centralesupelec.fr>

% Copying Permission Statement
%
%    This file is part of
%
%            STK: a Small (Matlab/Octave) Toolbox for Kriging
%               (https://github.com/stk-kriging/stk/)
%
%    STK is free software: you can redistribute it and/or modify it under
%    the terms of the GNU General Public License as published by the Free
%    Software Foundation,  either version 3  of the License, or  (at your
%    option) any later version.
%
%    STK is distributed  in the hope that it will  be useful, but WITHOUT
%    ANY WARRANTY;  without even the implied  warranty of MERCHANTABILITY
%    or FITNESS  FOR A  PARTICULAR PURPOSE.  See  the GNU  General Public
%    License for more details.
%
%    You should  have received a copy  of the GNU  General Public License
%    along with STK.  If not, see <http://www.gnu.org/licenses/>.

function AKG = stk_sampcrit_akg_eval (zc_mean, zc_std, zr_mean, zr_std, zcr_cov)

% zc_mean:   M x 1  where M is the number of (C)andidate points
% zc_std:    M x 1
% zr_mean:   L x 1  where L is the number of (R)eference points
% zr_std:    L x 1
% zcr_cov    M x L  covariance between (C)andidate points and (R)eference points

% note: Scott et al.'s "KGCP" corresponds to refs points X_1, ..., X_n

M = size (zc_mean, 1);
if ~ isequal (size (zc_mean), [M 1])
    stk_error (['zc_mean should have size M x 1, where M is the number of ' ...
        'candidate points.'], 'IncorrectSize');
end
if ~ isequal (size (zc_std), [M 1])
    stk_error (['zc_std should have size M x 1, where M is the number of ' ...
        'candidate points.'], 'IncorrectSize');
end

L = size (zr_mean, 1);
if ~ isequal (size (zr_mean), [L 1])
    stk_error (['zr_mean should have size L x 1, where L is the number of ' ...
        'reference points.'], 'IncorrectSize');
end
if ~ isequal (size (zr_std), [L 1])
    stk_error (['zr_std should have size L x 1, where L is the number of ' ...
        'reference points.'], 'IncorrectSize');
end

if ~ isequal (size (zcr_cov), [M L])
    stk_error (['zcr_cov should have size L x 1, where M is the number of ' ...
        'candidate points and L the number of reference points.'], ...
        'IncorrectSize');
end

AKG = zeros (M, 1);

% Minimum over the reference grid
if isempty (zr_mean)
    zr_min = +inf;
else
    zr_min = min (zr_mean);
end

for i = 1:M
    
    if zc_std(i) == 0,  continue;  end
    
    % Mitigate the effect of small inaccurate covariance values
    a0 = zcr_cov(i,:)' / zc_std(i);
    a0 = max (-zr_std, min (zr_std, a0));
    
    a = [a0; zc_std(i)];        % slopes
    b = [zr_mean; zc_mean(i)];  % intercepts
    
    % Intersection of lower half-planes
    % (algorithm similar to the one in Scott et al, 2011, Table 4.1,
    %  except that cases of equal slopes are dealt with inside the loop)
    [a, b, z] = stk_halfpintl (a, b);
    
    % Compute normal cdfs and pdfs
    F = [0; stk_distrib_normal_cdf(z); 1];
    p = [0; stk_distrib_normal_pdf(z); 0];
    
    % Compute the expected min  (Equation 4.11 in Scott et al, 2011)
    expected_min = sum (b .* diff (F)) - sum (a .* diff (p));
    
    % Finally, compute the value of the AKG criterion
    AKG(i) = max (0, min (zr_min, zc_mean(i)) - expected_min);
    
end % for

end % function


%!shared zc_mean, zc_std, zr_mean, zr_std, zcr_cov, AKG, nc
%! xi = [0; 0.2; 0.7; 0.9];
%! zi = [1; 0.9; 0.6; 0.1] - 10;
%! ni = 4;
%!
%! M_prior = stk_model (@stk_materncov32_iso);
%! M_prior.param = log ([1.0; 2.1]);
%! M_prior.lognoisevariance = 0.678;
%!
%! nc = 20;
%! xc = stk_sampling_regulargrid (nc, 1, [0; 1]);
%! [zp, ~, ~, K] = stk_predict (M_prior, xi, zi, [xi; xc]);
%!
%! ir = 1:ni;  ic = ni + (1:nc);
%!
%! zc_mean = zp.mean(ic);
%! zc_std = sqrt (zp.var(ic));
%!
%! % reference grid: current evaluation points ("KGCP")
%! zr_mean = zp.mean(ir);
%! zr_std = sqrt (zp.var(ir));
%!
%! zcr_cov = K(ic, ir);

%!test AKG = stk_sampcrit_akg_eval (zc_mean, zc_std, zr_mean, zr_std, zcr_cov);
%!assert (isequal (size (AKG), [nc 1]))
%!assert (all (AKG >= 0))

% not enough or too many input args
%!error AKG = stk_sampcrit_akg_eval (zc_mean, zc_std, zr_mean, zr_std);