summaryrefslogtreecommitdiff
path: root/examples/example_brute.py
blob: 5d2266ed512ba33be32c297b3c7b28b2418770d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
"""
Global minimization using the ``brute`` method (a.k.a. grid search)
===================================================================

"""
###############################################################################
# This notebook shows a simple example of using ``lmfit.minimize.brute`` that
# uses the method with the same name from ``scipy.optimize``.
#
# The method computes the function’s value at each point of a multidimensional
# grid of points, to find the global minimum of the function. It behaves
# identically to ``scipy.optimize.brute`` in case finite bounds are given on
# all varying parameters, but will also deal with non-bounded parameters
# (see below).
import copy

import matplotlib.pyplot as plt
import numpy as np

from lmfit import fit_report, Minimizer, Parameters

###############################################################################
# Let's start with the example given in the documentation of SciPy:
#
# "We illustrate the use of brute to seek the global minimum of a function of
# two variables that is given as the sum of a positive-definite quadratic and
# two deep “Gaussian-shaped” craters. Specifically, define the objective
# function f as the sum of three other functions, ``f = f1 + f2 + f3``. We
# suppose each of these has a signature ``(z, *params), where z = (x, y)``,
# and params and the functions are as defined below."
#
# First, we create a set of Parameters where all variables except ``x`` and
# ``y`` are given fixed values.
params = Parameters()
params.add_many(
        ('a', 2, False),
        ('b', 3, False),
        ('c', 7, False),
        ('d', 8, False),
        ('e', 9, False),
        ('f', 10, False),
        ('g', 44, False),
        ('h', -1, False),
        ('i', 2, False),
        ('j', 26, False),
        ('k', 1, False),
        ('l', -2, False),
        ('scale', 0.5, False),
        ('x', 0.0, True),
        ('y', 0.0, True))

###############################################################################
# Second, create the three functions and the objective function:


def f1(p):
    par = p.valuesdict()
    return (par['a'] * par['x']**2 + par['b'] * par['x'] * par['y'] +
            par['c'] * par['y']**2 + par['d']*par['x'] + par['e']*par['y'] + par['f'])


def f2(p):
    par = p.valuesdict()
    return (-1.0*par['g']*np.exp(-((par['x']-par['h'])**2 +
            (par['y']-par['i'])**2) / par['scale']))


def f3(p):
    par = p.valuesdict()
    return (-1.0*par['j']*np.exp(-((par['x']-par['k'])**2 +
            (par['y']-par['l'])**2) / par['scale']))


def f(params):
    return f1(params) + f2(params) + f3(params)


###############################################################################
# Just as in the documentation we will do a grid search between ``-4`` and
# ``4`` and use a stepsize of ``0.25``. The bounds can be set as usual with
# the ``min`` and ``max`` attributes, and the stepsize is set using
# ``brute_step``.
params['x'].set(min=-4, max=4, brute_step=0.25)
params['y'].set(min=-4, max=4, brute_step=0.25)

###############################################################################
# Performing the actual grid search is done with:
fitter = Minimizer(f, params)
result = fitter.minimize(method='brute')

###############################################################################
# , which will increment ``x`` and ``y`` between ``-4`` in increments of
# ``0.25`` until ``4`` (not inclusive).
grid_x, grid_y = [np.unique(par.ravel()) for par in result.brute_grid]
print(grid_x)

###############################################################################
# The objective function is evaluated on this grid, and the raw output from
# ``scipy.optimize.brute`` is stored in the MinimizerResult as
# ``brute_<parname>`` attributes. These attributes are:
#
# ``result.brute_x0`` -- A 1-D array containing the coordinates of a point at
# which the objective function had its minimum value.
print(result.brute_x0)

###############################################################################
# ``result.brute_fval`` -- Function value at the point x0.
print(result.brute_fval)

###############################################################################
# ``result.brute_grid`` -- Representation of the evaluation grid. It has the
# same length as x0.
print(result.brute_grid)

###############################################################################
# ``result.brute_Jout`` -- Function values at each point of the evaluation
# grid, i.e., Jout = func(\*grid).
print(result.brute_Jout)

###############################################################################
# **Reassuringly, the obtained results are indentical to using the method in
# SciPy directly!**

###############################################################################
# Example 2: fit of a decaying sine wave
#
# In this example, will explain some of the options ot the algorithm.
#
# We start off by generating some synthetic data with noise for a decaying
# sine wave, define an objective function and create a Parameter set.
x = np.linspace(0, 15, 301)
np.random.seed(7)
noise = np.random.normal(size=x.size, scale=0.2)
data = (5. * np.sin(2*x - 0.1) * np.exp(-x*x*0.025) + noise)
plt.plot(x, data, 'b')


def fcn2min(params, x, data):
    """Model decaying sine wave, subtract data."""
    amp = params['amp']
    shift = params['shift']
    omega = params['omega']
    decay = params['decay']
    model = amp * np.sin(x*omega + shift) * np.exp(-x*x*decay)
    return model - data


# create a set of Parameters
params = Parameters()
params.add('amp', value=7, min=2.5)
params.add('decay', value=0.05)
params.add('shift', value=0.0, min=-np.pi/2., max=np.pi/2)
params.add('omega', value=3, max=5)

###############################################################################
# In contrast to the implementation in SciPy (as shown in the first example),
# varying parameters do not need to have finite bounds in lmfit. However, in
# that case they **do** need the ``brute_step`` attribute specified, so let's
# do that:
params['amp'].set(brute_step=0.25)
params['decay'].set(brute_step=0.005)
params['omega'].set(brute_step=0.25)

###############################################################################
# Our initial parameter set is now defined as shown below and this will
# determine how the grid is set-up.
params.pretty_print()

###############################################################################
# First, we initialize a Minimizer and perform the grid search:
fitter = Minimizer(fcn2min, params, fcn_args=(x, data))
result_brute = fitter.minimize(method='brute', Ns=25, keep=25)

###############################################################################
# We used two new parameters here: ``Ns`` and ``keep``. The parameter ``Ns``
# determines the \'number of grid points along the axes\' similarly to its usage
# in SciPy. Together with ``brute_step``, ``min`` and ``max`` for a Parameter
# it will dictate how the grid is set-up:
#
# **(1)** finite bounds are specified ("SciPy implementation"): uses
# ``brute_step`` if present (in the example above) or uses ``Ns`` to generate
# the grid. The latter scenario that interpolates ``Ns`` points from ``min``
# to ``max`` (inclusive), is here shown for the parameter ``shift``:
par_name = 'shift'
indx_shift = result_brute.var_names.index(par_name)
grid_shift = np.unique(result_brute.brute_grid[indx_shift].ravel())
print("parameter = {}\nnumber of steps = {}\ngrid = {}".format(par_name,
      len(grid_shift), grid_shift))

###############################################################################
# If finite bounds are not set for a certain parameter then the user **must**
# specify ``brute_step`` - three more scenarios are considered here:
#
# **(2)** lower bound (min) and brute_step are specified:
# range = (min, min + Ns * brute_step, brute_step)
par_name = 'amp'
indx_shift = result_brute.var_names.index(par_name)
grid_shift = np.unique(result_brute.brute_grid[indx_shift].ravel())
print("parameter = {}\nnumber of steps = {}\ngrid = {}".format(par_name, len(grid_shift), grid_shift))

###############################################################################
# **(3)** upper bound (max) and brute_step are specified:
# range = (max - Ns * brute_step, max, brute_step)
par_name = 'omega'
indx_shift = result_brute.var_names.index(par_name)
grid_shift = np.unique(result_brute.brute_grid[indx_shift].ravel())
print("parameter = {}\nnumber of steps = {}\ngrid = {}".format(par_name, len(grid_shift), grid_shift))

###############################################################################
# **(4)** numerical value (value) and brute_step are specified:
# range = (value - (Ns//2) * brute_step, value + (Ns//2) * brute_step, brute_step)
par_name = 'decay'
indx_shift = result_brute.var_names.index(par_name)
grid_shift = np.unique(result_brute.brute_grid[indx_shift].ravel())
print("parameter = {}\nnumber of steps = {}\ngrid = {}".format(par_name, len(grid_shift), grid_shift))

###############################################################################
# The ``MinimizerResult`` contains all the usual best-fit parameters and
# fitting statistics. For example, the optimal solution from the grid search
# is given below together with a plot:
print(fit_report(result_brute))
plt.plot(x, data, 'b')
plt.plot(x, data + fcn2min(result_brute.params, x, data), 'r--')

###############################################################################
# We can see that this fit is already very good, which is what we should expect
# since our ``brute`` force grid is sampled rather finely and encompasses the
# "correct" values.
#
# In a more realistic, complicated example the ``brute`` method will be used
# to get reasonable values for the parameters and perform another minimization
# (e.g., using ``leastsq``) using those as starting values. That is where the
# `keep`` parameter comes into play: it determines the "number of best
# candidates from the brute force method that are stored in the ``candidates``
# attribute". In the example above we store the best-ranking 25 solutions (the
# default value is ``50`` and storing all the grid points can be accomplished
# by choosing ``all``). The ``candidates`` attribute contains the parameters
# and ``chisqr`` from the brute force method as a namedtuple,
# ``(‘Candidate’, [‘params’, ‘score’])``, sorted on the (lowest) ``chisqr``
# value. To access the values for a particular candidate one can use
# ``result.candidate[#].params`` or ``result.candidate[#].score``, where a
# lower # represents a better candidate. The ``show_candidates(#)`` uses the
# ``pretty_print()`` method to show a specific candidate-# or all candidates
# when no number is specified.
#
# The optimal fit is, as usual, stored in the ``MinimizerResult.params``
# attribute and is, therefore, identical to ``result_brute.show_candidates(1)``.
result_brute.show_candidates(1)

###############################################################################
#  In this case, the next-best scoring candidate has already a ``chisqr`` that
#  increased quite a bit:
result_brute.show_candidates(2)

###############################################################################
# and is, therefore, probably not so likely... However, as said above, in most
# cases you'll want to do another minimization using the solutions from the
# ``brute`` method as starting values. That can be easily accomplished as
# shown in the code below, where we now perform a ``leastsq`` minimization
# starting from the top-25 solutions and accept the solution if the ``chisqr``
# is lower than the previously 'optimal' solution:
best_result = copy.deepcopy(result_brute)

for candidate in result_brute.candidates:
    trial = fitter.minimize(method='leastsq', params=candidate.params)
    if trial.chisqr < best_result.chisqr:
        best_result = trial

###############################################################################
# From the ``leastsq`` minimization we obtain the following parameters for the
# most optimal result:
print(fit_report(best_result))

###############################################################################
# As expected the parameters have not changed significantly as they were
# already very close to the "real" values, which can also be appreciated from
# the plots below.
plt.plot(x, data, 'b')
plt.plot(x, data + fcn2min(result_brute.params, x, data), 'r--',
         label='brute')
plt.plot(x, data + fcn2min(best_result.params, x, data), 'g--',
         label='brute followed by leastsq')
plt.legend()


###############################################################################
# Finally, the results from the ``brute`` force grid-search can be visualized
# using the rather lengthy Python function below (which might get incorporated
# in lmfit at some point).
def plot_results_brute(result, best_vals=True, varlabels=None,
                       output=None):
    """Visualize the result of the brute force grid search.

    The output file will display the chi-square value per parameter and contour
    plots for all combination of two parameters.

    Inspired by the `corner` package (https://github.com/dfm/corner.py).

    Parameters
    ----------
    result : :class:`~lmfit.minimizer.MinimizerResult`
        Contains the results from the :meth:`brute` method.

    best_vals : bool, optional
        Whether to show the best values from the grid search (default is True).

    varlabels : list, optional
        If None (default), use `result.var_names` as axis labels, otherwise
        use the names specified in `varlabels`.

    output : str, optional
        Name of the output PDF file (default is 'None')
    """
    from matplotlib.colors import LogNorm

    npars = len(result.var_names)
    fig, axes = plt.subplots(npars, npars)

    if not varlabels:
        varlabels = result.var_names
    if best_vals and isinstance(best_vals, bool):
        best_vals = result.params

    for i, par1 in enumerate(result.var_names):
        for j, par2 in enumerate(result.var_names):

            # parameter vs chi2 in case of only one parameter
            if npars == 1:
                axes.plot(result.brute_grid, result.brute_Jout, 'o', ms=3)
                axes.set_ylabel(r'$\chi^{2}$')
                axes.set_xlabel(varlabels[i])
                if best_vals:
                    axes.axvline(best_vals[par1].value, ls='dashed', color='r')

            # parameter vs chi2 profile on top
            elif i == j and j < npars-1:
                if i == 0:
                    axes[0, 0].axis('off')
                ax = axes[i, j+1]
                red_axis = tuple([a for a in range(npars) if a != i])
                ax.plot(np.unique(result.brute_grid[i]),
                        np.minimum.reduce(result.brute_Jout, axis=red_axis),
                        'o', ms=3)
                ax.set_ylabel(r'$\chi^{2}$')
                ax.yaxis.set_label_position("right")
                ax.yaxis.set_ticks_position('right')
                ax.set_xticks([])
                if best_vals:
                    ax.axvline(best_vals[par1].value, ls='dashed', color='r')

            # parameter vs chi2 profile on the left
            elif j == 0 and i > 0:
                ax = axes[i, j]
                red_axis = tuple([a for a in range(npars) if a != i])
                ax.plot(np.minimum.reduce(result.brute_Jout, axis=red_axis),
                        np.unique(result.brute_grid[i]), 'o', ms=3)
                ax.invert_xaxis()
                ax.set_ylabel(varlabels[i])
                if i != npars-1:
                    ax.set_xticks([])
                elif i == npars-1:
                    ax.set_xlabel(r'$\chi^{2}$')
                if best_vals:
                    ax.axhline(best_vals[par1].value, ls='dashed', color='r')

            # contour plots for all combinations of two parameters
            elif j > i:
                ax = axes[j, i+1]
                red_axis = tuple([a for a in range(npars) if a != i and a != j])
                X, Y = np.meshgrid(np.unique(result.brute_grid[i]),
                                   np.unique(result.brute_grid[j]))
                lvls1 = np.linspace(result.brute_Jout.min(),
                                    np.median(result.brute_Jout)/2.0, 7, dtype='int')
                lvls2 = np.linspace(np.median(result.brute_Jout)/2.0,
                                    np.median(result.brute_Jout), 3, dtype='int')
                lvls = np.unique(np.concatenate((lvls1, lvls2)))
                ax.contourf(X.T, Y.T, np.minimum.reduce(result.brute_Jout, axis=red_axis),
                            lvls, norm=LogNorm())
                ax.set_yticks([])
                if best_vals:
                    ax.axvline(best_vals[par1].value, ls='dashed', color='r')
                    ax.axhline(best_vals[par2].value, ls='dashed', color='r')
                    ax.plot(best_vals[par1].value, best_vals[par2].value, 'rs', ms=3)
                if j != npars-1:
                    ax.set_xticks([])
                elif j == npars-1:
                    ax.set_xlabel(varlabels[i])
                if j - i >= 2:
                    axes[i, j].axis('off')

    if output is not None:
        plt.savefig(output)


###############################################################################
# and finally, to generated the figure:
plot_results_brute(result_brute, best_vals=True, varlabels=None)