summaryrefslogtreecommitdiff
path: root/PyMca5/PyMcaMath/mva/PCATools.py
diff options
context:
space:
mode:
Diffstat (limited to 'PyMca5/PyMcaMath/mva/PCATools.py')
-rw-r--r--PyMca5/PyMcaMath/mva/PCATools.py126
1 files changed, 62 insertions, 64 deletions
diff --git a/PyMca5/PyMcaMath/mva/PCATools.py b/PyMca5/PyMcaMath/mva/PCATools.py
index 85261ef..25a7e78 100644
--- a/PyMca5/PyMcaMath/mva/PCATools.py
+++ b/PyMca5/PyMcaMath/mva/PCATools.py
@@ -2,7 +2,7 @@
#
# The PyMca X-Ray Fluorescence Toolkit
#
-# Copyright (c) 2004-2016 European Synchrotron Radiation Facility
+# Copyright (c) 2004-2019 European Synchrotron Radiation Facility
#
# This file is part of the PyMca X-ray Fluorescence Toolkit developed at
# the ESRF by the Software group.
@@ -85,17 +85,17 @@ def getCovarianceMatrix(stack,
:type spatial_mask: Numpy array of unsigned bytes (numpy.uint8) or None (default).
:returns: The covMatrix, the average spectrum and the number of used pixels.
"""
- #the 1D mask = weights should correspond to the values, before or after
- #sampling? it could be handled as weigths to be applied to the
- #spectra. That would allow two uses, as mask and as weights, at
- #the cost of a multiplication.
+ # the 1D mask = weights should correspond to the values, before or after
+ # sampling? it could be handled as weights to be applied to the
+ # spectra. That would allow two uses, as mask and as weights, at
+ # the cost of a multiplication.
- #the spatial_mask accounts for pixels to be considered. It allows
- #to calculate the covariance matrix of a subset or to deal with
- #non finite data (NaN, +inf, -inf, ...). The calling program
- #should set the mask.
+ # the spatial_mask accounts for pixels to be considered. It allows
+ # to calculate the covariance matrix of a subset or to deal with
+ # non finite data (NaN, +inf, -inf, ...). The calling program
+ # should set the mask.
- #recover the actual data to work with
+ # recover the actual data to work with
if hasattr(stack, "info") and hasattr(stack, "data"):
#we are dealing with a PyMca data object
data = stack.data
@@ -117,18 +117,18 @@ def getCovarianceMatrix(stack,
else:
actualIndex = index
- #the number of spatial pixels
+ # the number of spatial pixels
nPixels = 1
for i in range(len(oldShape)):
if i != actualIndex:
nPixels *= oldShape[i]
- #remove inf or nan
+ # remove inf or nan
#image_data = data.sum(axis=actualIndex)
#spatial_mask = numpy.isfinite(image_data)
#
- #the starting number of channels or of images
+ # the starting number of channels or of images
N = oldShape[actualIndex]
# our binning (better said sampling) is spectral, in order not to
@@ -156,7 +156,7 @@ def getCovarianceMatrix(stack,
else:
cleanWeights = weights[::binning]
- #end of checking part
+ # end of checking part
eigenvectorLength = nChannels
if (not force)and isinstance(data, numpy.ndarray):
@@ -198,7 +198,7 @@ def getCovarianceMatrix(stack,
averageMatrix = None
return covMatrix, sumSpectrum / usedPixels, usedPixels
- #we are dealing with dynamically loaded data
+ # we are dealing with dynamically loaded data
_logger.debug("DYNAMICALLY LOADED DATA")
#create the needed storage space for the covariance matrix
try:
@@ -213,15 +213,15 @@ def getCovarianceMatrix(stack,
data = None
raise
- #workaround a problem with h5py
+ # workaround a problem with h5py
try:
if actualIndex in [0]:
testException = data[0:1]
else:
if len(data.shape) == 2:
- testException = data[0:1,-1]
+ testException = data[0:1, -1]
elif len(data.shape) == 3:
- testException = data[0:1,0:1,-1]
+ testException = data[0:1, 0:1, -1]
except AttributeError:
txt = "%s" % type(data)
if 'h5py' in txt:
@@ -232,13 +232,13 @@ def getCovarianceMatrix(stack,
raise
if actualIndex in [0]:
- #divider is used to decide the fraction of images to keep in memory
- #in order to limit file access on dynamically loaded data.
- #Since two chunks of the same size are used, the amount of memory
- #needed is twice the data size divided by the divider.
- #For instance, divider = 10 implies the data to be read 5.5 times
- #from disk while having a memory footprint of about one fifth of
- #the dataset size.
+ # divider is used to decide the fraction of images to keep in memory
+ # in order to limit file access on dynamically loaded data.
+ # Since two chunks of the same size are used, the amount of memory
+ # needed is twice the data size divided by the divider.
+ # For instance, divider = 10 implies the data to be read 5.5 times
+ # from disk while having a memory footprint of about one fifth of
+ # the dataset size.
step = 0
divider = 10
while step < 1:
@@ -284,12 +284,12 @@ def getCovarianceMatrix(stack,
#get step images for the second chunk
jToRead = min(step, nChannels - j)
- #with loop:
+ # with loop:
#for k in range(0, jToRead):
# chunk2[:,k] = data[(j+k):(j+k+1)].reshape(1,-1)
# if spatial_mask is not None:
# chunk2[badMask[(j+k):(j+k+1),k]] = 0
- #equivalent without loop:
+ # equivalent without loop:
chunk2[:, 0:jToRead] =\
data[j:(j + jToRead)].reshape(jToRead, -1).T
if spatial_mask is not None:
@@ -303,7 +303,7 @@ def getCovarianceMatrix(stack,
if spatial_mask is not None:
chunk2[badMask, 0:jToRead] = 0
- #dot product
+ # dot product
if (iToRead != step) or (jToRead != step):
covMatrix[i: (i + iToRead), j: (j + jToRead)] =\
dotblas.dot(chunk1[:iToRead, :nPixels],
@@ -316,7 +316,7 @@ def getCovarianceMatrix(stack,
covMatrix[j: (j + jToRead), i: (i + iToRead)] =\
covMatrix[i: (i + iToRead), j: (j + jToRead)].T
- #increment j
+ # increment j
j += jToRead
i += iToRead
chunk1 = None
@@ -331,7 +331,7 @@ def getCovarianceMatrix(stack,
spectrumIndex = 0
nImagesRead = 0
while i < N:
- #fill chunk1
+ # fill chunk1
jj = 0
for iToRead in range(0, int(min(step * binning, N - i)),
binning):
@@ -349,12 +349,12 @@ def getCovarianceMatrix(stack,
iToRead = jj
j = 0
while j <= i:
- #get step images for the second chunk
+ # get step images for the second chunk
if j == i:
jToRead = iToRead
chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T
else:
- #get step images for the second chunk
+ # get step images for the second chunk
jj = 0
for jToRead in range(0,
int(min(step * binning, N - j)),
@@ -369,7 +369,7 @@ def getCovarianceMatrix(stack,
average.shape = 1, jj
chunk2 -= average
jToRead = jj
- #dot product
+ # dot product
if (iToRead != step) or (jToRead != step):
covMatrix[i:(i + iToRead), j:(j + jToRead)] =\
dotblas.dot(chunk1[:iToRead, :nPixels],
@@ -382,7 +382,7 @@ def getCovarianceMatrix(stack,
covMatrix[j:(j + jToRead), i:(i + iToRead)] =\
covMatrix[i:(i + iToRead), j:(j + jToRead)].T
- #increment j
+ # increment j
j += jToRead * step
i += iToRead * step
chunk1 = None
@@ -390,17 +390,17 @@ def getCovarianceMatrix(stack,
else:
raise ValueError("PCATools.getCovarianceMatrix: Unhandled case")
- #should one divide by N or by N-1 ?? if we use images, we
- #assume the observables are the images, not the spectra!!!
- #so, covMatrix /= nChannels is wrong and one has to use:
+ # should one divide by N or by N-1 ?? if we use images, we
+ # assume the observables are the images, not the spectra!!!
+ # so, covMatrix /= nChannels is wrong and one has to use:
covMatrix /= usedPixels
else:
- #the data are already arranged as (nPixels, nChannels) and we
- #basically have to return data.T * data to get back the covariance
- #matrix as (nChannels, nChannels)
- #if someone had the bad idea to store the data in HDF5 with a chunk
- #size based on the pixels and not on the spectra a loop based on
- #reading spectrum per spectrum can be very slow
+ # the data are already arranged as (nPixels, nChannels) and we
+ # basically have to return data.T * data to get back the covariance
+ # matrix as (nChannels, nChannels)
+ # if someone had the bad idea to store the data in HDF5 with a chunk
+ # size based on the pixels and not on the spectra a loop based on
+ # reading spectrum per spectrum can be very slow
step = 0
divider = 10
while step < 1:
@@ -450,7 +450,7 @@ def getCovarianceMatrix(stack,
k += kToRead
tmpData = None
elif oldShape[1] == 1:
- #almost identical to the previous case
+ # almost identical to the previous case
tmpData = numpy.zeros((step, nChannels), numpy.float64)
if cleanMask is not None:
badMask.shape = data.shape[0], data.shape[1]
@@ -489,16 +489,16 @@ def getCovarianceMatrix(stack,
k += kToRead
tmpData = None
else:
- #I should choose the sizes in terms of the size
- #of the dataset
+ # I should choose the sizes in terms of the size
+ # of the dataset
if oldShape[0] < 41:
- #divide by 10
+ # divide by 10
deltaRow = 4
elif oldShape[0] < 101:
- #divide by 10
+ # divide by 10
deltaRow = 10
else:
- #take pieces of one tenth
+ # take pieces of one tenth
deltaRow = int(oldShape[0] / 10)
deltaCol = oldShape[1]
tmpData = numpy.zeros((deltaRow, deltaCol, nChannels),
@@ -522,10 +522,10 @@ def getCovarianceMatrix(stack,
covMatrix += dotblas.dot(a.T, a)
a = None
i += iToRead
- #should one divide by N or by N-1 ??
+ # should one divide by N or by N-1 ??
covMatrix /= usedPixels - 1
if center:
- #the n-1 appears again here
+ # the n-1 appears again here
averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\
/ (usedPixels * (usedPixels - 1))
covMatrix -= averageMatrix
@@ -539,7 +539,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
_logger.debug("index = %d", index)
_logger.debug("center = %s", center)
_logger.debug("scale = %s", scale)
- #recover the actual data to work with
+ # recover the actual data to work with
if hasattr(stack, "info") and hasattr(stack, "data"):
#we are dealing with a PyMca data object
data = stack.data
@@ -558,7 +558,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
else:
actualIndex = index
- #workaround a problem with h5py
+ # workaround a problem with h5py
try:
if actualIndex in [0]:
testException = data[0:1]
@@ -576,13 +576,13 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
else:
raise
- #the number of spatial pixels
+ # the number of spatial pixels
nPixels = 1
for i in range(len(oldShape)):
if i != actualIndex:
nPixels *= oldShape[i]
- #the number of channels
+ # the number of channels
nChannels = oldShape[actualIndex]
if binning is None:
binning = 1
@@ -602,7 +602,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
spatial_mask=mask,
weights=spectral_mask)
- #the total variance is the sum of the elements of the diagonal
+ # the total variance is the sum of the elements of the diagonal
totalVariance = numpy.diag(cov)
standardDeviation = numpy.sqrt(totalVariance)
standardDeviation = standardDeviation + (standardDeviation == 0)
@@ -635,7 +635,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
images = numpy.zeros((ncomponents, nPixels), dtype)
eigenvectors = numpy.zeros((ncomponents, N), dtype)
eigenvalues = numpy.zeros((ncomponents,), dtype)
- #sort eigenvalues
+ # sort eigenvalues
if 1:
a = [(evalues[i], i) for i in range(len(evalues))]
a.sort()
@@ -658,7 +658,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
eigenvalues[:] = evalues[idx]
eigenvectors[:, :] = evectors[:, idx].T
- #calculate the projections
+ # calculate the projections
# Subtracting the average and normalizing to standard deviation gives worse results.
# Versions 5.0.0 to 5.1.0 implemented that behavior as default.
# When dealing with the CH1777 test dataset the Sb signal was less contrasted against
@@ -675,13 +675,12 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
for j in range(ncomponents):
images[j:j + 1, :] += tmpData * eigenvectors[j, i]
if len(oldShape) == 3:
- #reshape the images
+ # reshape the images
images.shape = ncomponents, oldShape[1], oldShape[2]
else:
- #array of spectra
+ # array of spectra
if len(oldShape) == 2:
for i in range(nPixels):
- #print i
tmpData = data[i, :]
tmpData.shape = 1, nChannels
if subtractAndNormalize:
@@ -690,13 +689,12 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
tmpData = tmpData[:, ::binning]
for j in range(ncomponents):
images[j, i] = numpy.dot(tmpData, eigenvectors[j])
- #reshape the images
+ # reshape the images
images.shape = ncomponents, nPixels
elif len(oldShape) == 3:
i = 0
for r in range(oldShape[0]):
for c in range(oldShape[1]):
- #print i
tmpData = data[r, c, :]
tmpData.shape = 1, nChannels
if subtractAndNormalize:
@@ -706,7 +704,7 @@ def numpyPCA(stack, index=-1, ncomponents=10, binning=None,
for j in range(ncomponents):
images[j, i] = numpy.dot(tmpData, eigenvectors[j])
i += 1
- #reshape the images
+ # reshape the images
images.shape = ncomponents, oldShape[0], oldShape[1]
if legacy:
return images, eigenvalues, eigenvectors