"""
Utility functions for dealing with spatial data
"""
import numpy as np
from sklearn.neighbors import NearestNeighbors
[docs]def bind_spatial(features, nbhd, wt_on_features=0.7):
"""
Return a new array of form [wt_on_features * features / feature_norm, (1-wt_on_features) * nbhd / nbhd_norm]
Parameters
----------
features: np.ndarray of shape (n_samples, n_features)
Feature matrix
nbhd: np.ndarray of shape (n_samples, n_clusters)
Cell neighborhood composition matrix
wt_on_features: float, default=0.7
Weight to put on the feature matrix.
Returns
-------
res: np.ndarray of shape (n_samples, n_features+n_clusters)
"""
# normalize two kinds of info for easier tuning of weight
feature_norm = np.linalg.norm(features)
nbhd_norm = np.linalg.norm(nbhd)
res = np.concatenate((
wt_on_features * features / feature_norm,
(1-wt_on_features) * nbhd / nbhd_norm
), axis=1)
return res
[docs]def get_spatial_knn_indices(locations, n_neighbors=15, method='kd_tree'):
"""
Compute k-nearest neighbors of locations.
Parameters
----------
locations: np.ndarray of shape (n_samples, 2)
Data matrix
n_neighbors: int
Number of nearest neighbors
method: str, default='kd_tree'
Method to use when computing the nearest neighbors, one of ['ball_tree', 'kd_tree', 'brute']
Returns
-------
knn_indices: np.ndarray of shape (n_samples, n_neighbors)
Each row represents the knn of that sample
"""
locations = np.array(locations)
assert n_neighbors <= locations.shape[0]
# k-NN indices, may be asymmetric
_, knn_indices = NearestNeighbors(
n_neighbors=n_neighbors, algorithm=method
).fit(locations).kneighbors(locations)
return knn_indices
[docs]def get_neighborhood_composition(knn_indices, labels, log1p=False):
"""
Compute the composition of neighbors for each sample.
Parameters
----------
knn_indices: np.ndarray of shape (n_samples, n_neighbors)
Each row represents the knn of that sample
labels: np.ndarray of shape (n_samples, )
Cluster labels
log1p: bool, default=False
Whether to apply log1p transformation
Returns
-------
comp: np.ndarray of shape (n_samples, n_neighbors)
The composition (in proportion) of neighbors for each sample.
"""
labels = list(labels)
n, k = knn_indices.shape
unique_clusters = np.unique(labels)
n_clusters = len(unique_clusters)
label_to_clust_idx = {label: i for i, label in enumerate(unique_clusters)}
comp = np.zeros((n, n_clusters))
for i, neighbors in enumerate(knn_indices):
good_neighbors = [nb for nb in neighbors if nb != -1]
for nb in good_neighbors:
comp[i, label_to_clust_idx[labels[nb]]] += 1
if log1p:
comp = np.log1p(comp)
return comp