"""
Nonlinear Sammon Mapping, as proposed in:
Sammon, J. W. (1969). A nonlinear mapping for data structure analysis. IEEE Transactions on computers, 100(5), 401-409.
"""
import numpy as np
from scipy.spatial.distance import cdist
from numba import jit
from ._optim import gradient_descent_line_search
import inspect
[docs]
class Sammon():
def __init__(
self,
n_dims = 2,
n_iter = 2000,
n_iter_check = 50,
init = None,
verbose = 0,
input_type = 'distance',
max_halves = 5,
tol = 1e-3,
n_inits = 1,
step_size = 1
):
self.n_dims = n_dims
self.n_iter = n_iter
self.n_iter_check = n_iter_check
self.init = init
self.verbose = verbose
self.input_type = input_type
self.max_halves = max_halves
self.tol = tol
self.n_inits = n_inits
self.step_size = step_size
self.method_str = "SAMMON"
[docs]
def __str__(self):
"""Return a string representation of the Sammon instance with input_type and user-modified parameters."""
# Get the signature of the __init__ method
signature = inspect.signature(self.__init__)
# Get the default values of the parameters from the __init__ method
defaults = {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty}
modified_params = []
# Always show the 'input_type' parameter
result = f"Sammon(input_type={self.input_type}"
# Check if any current attributes differ from their default values and append them
for param, default_value in defaults.items():
current_value = getattr(self, param)
if current_value != default_value and param != "input_type":
modified_params.append(f"{param}={current_value}")
# If there are modified parameters, append them to the result
if modified_params:
result += ", " + ", ".join(modified_params)
result += ")"
return result
[docs]
def fit(self, X):
"""Fit the Sammon model to the input data.
Parameters
----------
X : np.array of shape (n_samples, n_features) or (n_samples, n_samples)
The input data. If `input_type` is 'vector', `X` should be the feature
vectors of the samples. If `input_type` is 'distance', `X` should be
the pairwise distance matrix.
Returns
-------
self : object
Returns the instance of the Sammon class with the configuration matrix
`Y_` stored as an attribute.
"""
self.fit_transform(X)
return self
[docs]
def _sammon_stress_function(positions, disparities, compute_error = True, compute_grad = True):
"""Compute the Sammon stress function and its gradient.
The Sammon stress function measures the discrepancy between the input distances
(disparities) and the distances among the estimated positions in the reduced space.
Optionally, it can also compute the gradient of the stress function for optimization purposes.
Parameters
----------
positions : ndarray of shape (n_samples, n_dims)
The estimated positions of the samples in the reduced-dimensional space.
disparities : ndarray of shape (n_samples, n_samples)
The input distance (or dissimilarity) matrix.
compute_error : bool, optional
Whether to compute the stress (error) value, by default True.
compute_grad : bool, optional
Whether to compute the gradient of the stress function, by default True.
Returns
-------
float or None
The Sammon stress value (cost), or None if `compute_error` is False.
ndarray or None
The gradient of the stress function, or None if `compute_grad` is False.
"""
distances = cdist(positions, positions, metric = 'euclidean')
if compute_error:
n_samples = positions.shape[0]
D_map = distances.copy()
D_map = D_map[np.triu_indices(n_samples, k = 1)]
D_data = disparities[np.triu_indices(n_samples, k = 1)]
delta = D_data - D_map
D_data_inv = 1. / D_data
cost = np.sum((delta**2) * D_data_inv) / np.sum(D_data)
else:
cost = None
if compute_grad:
grad = _sammon_stress_gradient(positions, distances, disparities)
# Need to reshape gradient. Raveled for speed optim.
grad = np.reshape(grad, (-1,positions.shape[1]),order='F')
else:
grad = None
return cost, grad
[docs]
@jit(nopython=True)
def _sammon_stress_gradient(Y, D_map, D):
"""Compute the gradient of the Sammon stress function.
Parameters
----------
Y : ndarray of shape (n_samples, n_dims)
The current positions of the samples in the reduced-dimensional space.
D_map : ndarray of shape (n_samples, n_samples)
The pairwise Euclidean distances between the estimated positions.
D : ndarray of shape (n_samples, n_samples)
The input distance (or dissimilarity) matrix.
Returns
-------
ndarray of shape (n_samples * n_dims)
The computed gradient of the Sammon stress function, flattened for optimization.
"""
n_samples = Y.shape[0]
n_dims = Y.shape[1]
D_map = D_map + np.eye(n_samples)
D_map_inv = 1. / D_map
D_inv = 1. / D
delta = D_map_inv - D_inv
one = np.ones((n_samples,n_dims))
deltaone = np.dot(delta,one) # sum of distances, (reapated n_dims times)
dY = np.dot(delta,Y) - (Y * deltaone)
dinv3 = D_map_inv ** 3
y2 = Y ** 2
H = np.dot(dinv3,y2) - deltaone - 2*Y * np.dot(dinv3,Y) + y2 * np.dot(dinv3,one)
iY = dY.transpose().flatten() / np.abs(H.transpose().flatten()) # Gradient / Hessian
return iY