Source code for maxfuse.spatial_utils

"""
Utility functions for dealing with spatial data
"""

import numpy as np
from sklearn.neighbors import NearestNeighbors


[docs]def bind_spatial(features, nbhd, wt_on_features=0.7): """ Return a new array of form [wt_on_features * features / feature_norm, (1-wt_on_features) * nbhd / nbhd_norm] Parameters ---------- features: np.ndarray of shape (n_samples, n_features) Feature matrix nbhd: np.ndarray of shape (n_samples, n_clusters) Cell neighborhood composition matrix wt_on_features: float, default=0.7 Weight to put on the feature matrix. Returns ------- res: np.ndarray of shape (n_samples, n_features+n_clusters) """ # normalize two kinds of info for easier tuning of weight feature_norm = np.linalg.norm(features) nbhd_norm = np.linalg.norm(nbhd) res = np.concatenate(( wt_on_features * features / feature_norm, (1-wt_on_features) * nbhd / nbhd_norm ), axis=1) return res
[docs]def get_spatial_knn_indices(locations, n_neighbors=15, method='kd_tree'): """ Compute k-nearest neighbors of locations. Parameters ---------- locations: np.ndarray of shape (n_samples, 2) Data matrix n_neighbors: int Number of nearest neighbors method: str, default='kd_tree' Method to use when computing the nearest neighbors, one of ['ball_tree', 'kd_tree', 'brute'] Returns ------- knn_indices: np.ndarray of shape (n_samples, n_neighbors) Each row represents the knn of that sample """ locations = np.array(locations) assert n_neighbors <= locations.shape[0] # k-NN indices, may be asymmetric _, knn_indices = NearestNeighbors( n_neighbors=n_neighbors, algorithm=method ).fit(locations).kneighbors(locations) return knn_indices
[docs]def get_neighborhood_composition(knn_indices, labels, log1p=False): """ Compute the composition of neighbors for each sample. Parameters ---------- knn_indices: np.ndarray of shape (n_samples, n_neighbors) Each row represents the knn of that sample labels: np.ndarray of shape (n_samples, ) Cluster labels log1p: bool, default=False Whether to apply log1p transformation Returns ------- comp: np.ndarray of shape (n_samples, n_neighbors) The composition (in proportion) of neighbors for each sample. """ labels = list(labels) n, k = knn_indices.shape unique_clusters = np.unique(labels) n_clusters = len(unique_clusters) label_to_clust_idx = {label: i for i, label in enumerate(unique_clusters)} comp = np.zeros((n, n_clusters)) for i, neighbors in enumerate(knn_indices): good_neighbors = [nb for nb in neighbors if nb != -1] for nb in good_neighbors: comp[i, label_to_clust_idx[labels[nb]]] += 1 if log1p: comp = np.log1p(comp) return comp