Source code for pysptools.skl.km

#
#------------------------------------------------------------------------------
# Copyright (c) 2013-2014, Christian Therien
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#------------------------------------------------------------------------------
#
# km.py - This file is part of the PySptools package.
#

"""
KMeans class
"""

import numpy as np
import sklearn.cluster as cluster
#from . import out
#from .inval import *
from pysptools.classification.out import Output
from pysptools.classification.inval import *


[docs]class KMeans(object):
    """ KMeans clustering algorithm adapted to hyperspectral imaging """

    def __init__(self):
        self.cluster = None
        self.n_clusters = None
        self.output = Output('KMeans')

    @PredictInputValidation('KMeans')
    def predict(self, M, n_clusters=5, n_jobs=1, init='k-means++'):
        """
        KMeans clustering algorithm adapted to hyperspectral imaging.
        It is a simple wrapper to the scikit-learn version.

        Parameters:
            M: `numpy array`
              A HSI cube (m x n x p).

            n_clusters: `int [default 5]`
                The number of clusters to generate.

            n_jobs: `int [default 1]`
                Taken from scikit-learn doc:
                The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel.
                If -1 all CPUs are used. If 1 is given, no parallel computing code is used at all, which is useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.

            init: `string or array [default 'k-means++']`
                Taken from scikit-learn doc: Method for initialization, defaults to `k-means++`:
                `k-means++` : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details.
                `random`: choose k observations (rows) at random from data for the initial centroids.
                If an ndarray is passed, it should be of shape (n_clusters, n_features) and gives the initial centers.


        Returns: `numpy array`
              A cluster map (m x n x c), c is the clusters number .

        """
        h, w, numBands = M.shape
        self.n_clusters = n_clusters
        X = np.reshape(M, (w*h, numBands))
        clf = cluster.KMeans(n_clusters=n_clusters, n_jobs=n_jobs, init=init)
        cls = clf.fit_predict(X)
        self.cluster = np.reshape(cls, (h, w))
        return self.cluster

    @PlotInputValidation3('KMeans')
    def plot(self, path, interpolation='none', colorMap='Accent', suffix=None):
        """
        Plot the cluster map.

        Parameters:
            path: `string`
              The path where to put the plot.

            interpolation: `string [default none]`
              A matplotlib interpolation method.

            colorMap: `string [default 'Accent']`
              A color map element of
              ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3'],
              "Accent" is the default and it fall back on "Jet".

            suffix: `string [default None]`
              Add a suffix to the file name.
        """
        self.output.plot(self.cluster, self.n_clusters, path=path, interpolation=interpolation, colorMap=colorMap, suffix=suffix)

    @DisplayInputValidation3('KMeans')
    def display(self, interpolation='none', colorMap='Accent', suffix=None):
        """
        Display the cluster map.

        Parameters:
            path: `string`
              The path where to put the plot.

            interpolation: `string [default none]`
              A matplotlib interpolation method.

            colorMap: `string [default 'Accent']`
              A color map element of
              ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3'],
              "Accent" is the default and it fall back on "Jet".

            suffix: `string [default None]`
              Add a suffix to the title.
        """
        self.output.plot(self.cluster, self.n_clusters, interpolation=interpolation, colorMap=colorMap, suffix=suffix)