Source code for metaspace_converter.colocalization

from typing import Optional, Tuple

import numpy as np
from anndata import AnnData
from scipy import ndimage

from metaspace_converter.anndata_to_array import anndata_to_image_array
from metaspace_converter.constants import COLOCALIZATION


[docs] def coloc_ml_preprocessing( adata: AnnData, layer: Optional[str] = "coloc_ml_preprocessing", median_filter_size: Tuple[int, int] = (3, 3), quantile_threshold: float = 0.5, ): """ Preprocessing for colocalization analysis according to the colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). In the publication, the authors evaluated colocalization metrics and preprocessing approaches. They found the best performance for 1. median filtering of ion images with a (3, 3) kernel size and 2. quantile thresholding at 50%, meaning all pixels with intensities below the 50% quantile set to 0. This function performs the same preprocessing steps. Recommended for call before running the ``colocalization`` function. Args: adata: An AnnData object. layer: Key for the adata.layers dict in which the processed data will be saved. Default value is ``colocml_preprocessing``. If None, ``adata.X`` will be overwritten with the processed data. median_filter_size: 2-dimensional filter size for the median filtering that will be performed per ion image. quantile_threshold: Float between 0 and 1. The function computes the quantile value per ion image and all pixels below the quantile threshold will be set to 0. Returns: None. The processed data is saved in ``layer``. If layer is set to None, ``adata.X`` will be overwritten Raises: ValueError: If no annotations are available in ``adata.X``. """ # Extract image array from anndata: imarray = anndata_to_image_array(adata=adata) if len(imarray) == 0: raise ValueError("AnnData contains no annotations. ColocML preprocessing cannot be called.") # Median filtering imarray = ndimage.median_filter(imarray, size=(1, median_filter_size[0], median_filter_size[1])) # reshaping imarray = imarray.reshape((imarray.shape[0], -1)) # Quantile thresholding mask = imarray < np.percentile(imarray, q=quantile_threshold * 100, axis=1)[:, np.newaxis] imarray[mask] = 0 # Inserting of processed data into adata object if layer is None: adata.X = imarray.transpose() else: adata.layers[layer] = imarray.transpose()
[docs] def colocalization(adata: AnnData, layer: Optional[str] = "coloc_ml_preprocessing"): """ Colocalization of ion images using the cosine similarity metric. In combination with the ``colocML_preprocessing`` function, this metric performed best in the colocML publication (https://doi.org/10.1093/bioinformatics/btaa085). It is recommended to call the ``coloc_ml_preprocessing`` function beforehand. Args: adata: An AnnData object. layer: Key for ``adata.layer`` from which the ionimage_data for preprocessing taken. If ``None``, ``adata.X`` is used. ``coloc_ml_preprocessing`` will save the preprocessed data per default in ``adata.layer['coloc_ml_preprocessing']``. Returns: None. The processed data is saved in ``adata.varp['colocalization']``. Raises: ValueError: If layer is not found in adata.layers. """ # Select data if layer is None: data = np.array(adata.X).transpose() elif layer in adata.layers.keys(): data = np.array(adata.layers[layer]).transpose() else: raise ValueError(f"Layer `{layer}` not found in adata.layers.") # Compute colocalization coloc = _pairwise_cosine_similarity(data) # Save colocalization adata.varp[COLOCALIZATION] = coloc
def _pairwise_cosine_similarity(data: np.ndarray) -> np.ndarray: # Divide image vectors by euclidean norm norm_data = data / np.linalg.norm(data, axis=1, keepdims=True) # Compute pairwise cosine similarity similarity_matrix = np.dot(norm_data, norm_data.T) return similarity_matrix