Source code for metaspace_converter.to_spatialdata

import warnings
from typing import Optional

import numpy as np
import pandas as pd
from anndata import AnnData
from metaspace import SMInstance
from metaspace.sm_annotation_utils import SMDataset
from spatial_image import SpatialImage
from spatialdata import SpatialData
from spatialdata.models import Image2DModel, PointsModel, TableModel
from spatialdata.transformations import Affine, Scale, Sequence as SequenceTransform, Translation

from metaspace_converter.constants import (
    COL,
    COORD_SYS_GLOBAL,
    INSTANCE_KEY,
    MICROMETER,
    OPTICAL_IMAGE_KEY,
    POINTS_KEY,
    REGION_KEY,
    XY,
    YX,
    YXC,
    X,
    Y,
)
from metaspace_converter.to_anndata import metaspace_to_anndata
from metaspace_converter.utils import has_optical_image


[docs] def metaspace_to_spatialdata( dataset: Optional[SMDataset] = None, dataset_id: Optional[str] = None, database: Optional[tuple[str, str]] = None, fdr: float = 0.1, use_tic: bool = False, metadata_as_obs: bool = False, add_optical_image: bool = True, optical_name_added: str = OPTICAL_IMAGE_KEY, add_points: bool = True, points_name_added: str = POINTS_KEY, sm: Optional[SMInstance] = None, **annotation_filter, ) -> SpatialData: """ Download a METASPACE dataset as a :py:class:`SpatialData` object. See: https://metaspace2020.eu/about Args: dataset: A METASPACE dataset instance. If not provided, the ``dataset_id`` must be given. dataset_id: The unique ID of a dataset on METASPACE, e.g. "2021-09-03_11h43m13s" database: A single METASPACE database given as a tuple of name and version. Usually it is displayed on METASPACE as "HMDB – v4" which corresponds to ``("HMDB", "v4")``. fdr: Returns only annotations for which the false discovery rate is less or equal to this limit. use_tic: When True, the output values will be scaled by the total ion count per pixel and will be in 0.0 to 1.0 range. metadata_as_obs: Whether to store metadata in the ``obs`` dataframe instead of ``uns``. For a single METASPACE dataset, metadata is the same for all pixels, so it would be duplicated for all ``obs``. When combining multiple datasets, it would be preserved in ``obs`` but not in ``uns``. add_optical_image: Whether to also add the optical image (if one exists) to SpatialData images. If none exists, it is not added, and no error raised. optical_name_added: Name of the element where to store the image in the SpatialData object add_points: Whether to also add ion image pixel coordinates as SpatialData points. This allows to spatially visualize the ion image values. If False, only ion intensities are added to the table and coordinates are added to ``obs``. points_name_added: Name of the element where to store the points in the SpatialData object sm: Optionally a cached SMInstance annotation_filter: Additional keyword arguments passed to the METASPACE API. Returns: A :py:class:`SpatialData` object with * ion intensities and metadata: ``.table`` * optical image: ``.images["optical_image"]`` (or ``optical_name_added``) * sampling coordinates: ``.points["maldi_points"]`` (or ``points_name_added``) in a coordinate system relative to the top-left image corner and physical scale (micrometers) Raises: ValueError: If something is wrong with the input data or parameters metaspace.sm_annotation_utils.DatasetNotFound: If the dataset ID is not found ConnectionError: For any other network connectivity problems """ if dataset is None: if sm is None: sm = SMInstance() dataset = sm.dataset(id=dataset_id) # Create table adata = metaspace_to_anndata( dataset=dataset, database=database, fdr=fdr, use_tic=use_tic, metadata_as_obs=metadata_as_obs, add_optical_image=False, sm=sm, **annotation_filter, ) table = _create_spatialdata_table(adata) sdata = SpatialData(table=table) # Create image if has_optical_image(dataset) and add_optical_image: optical_image = optical_image_to_spatial_image(dataset=dataset) sdata.images[optical_name_added] = optical_image # Create points if add_points: points = _create_points(dataset, adata) sdata.points[points_name_added] = points return sdata
def _is_matrix_homogeneous(transform_matrix_2d: np.ndarray) -> bool: # In an n×n matrix, the last row (except last element) should be zeros. actual = transform_matrix_2d[-1, :-1] expected = np.zeros(transform_matrix_2d.shape[1] - 1) return np.array_equal(actual, expected)
[docs] def optical_image_to_spatial_image(dataset: SMDataset) -> Optional[SpatialImage]: """ Download the optical image of a METASPACE dataset to a SpatialImage object. This function requires that the dataset has an optical image. Otherwise, an error is raised. Args: dataset: A METASPACE dataset Returns: A SpatialImage object with the image's transformation to world coordinates """ optical_images = dataset.optical_images() image_yx_rgb = optical_images[0] # Transformation from optical image to ion image coordinate system (inverse of ``_transforms``) matrix_xy = optical_images._itransforms[0] if _is_matrix_homogeneous(matrix_xy): affine_matrix_xy = matrix_xy else: warnings.warn( "The METASPACE optical image has a projective transformation, but " "SpatialData only supports affine transformations. " "Using an approximated affine." ) affine_matrix_xy = matrix_xy.copy() affine_matrix_xy[-1, :-1] = 0.0 optical_to_ion_image = Affine(matrix=affine_matrix_xy, input_axes=XY, output_axes=XY) # Transformation from ion image to world coordinate system ion_image_to_global = get_ion_image_to_physical_transformation(dataset) pixel_corner_to_center = Translation(translation=[-0.5, -0.5], axes=YX) optical_to_global = SequenceTransform( [optical_to_ion_image, pixel_corner_to_center, ion_image_to_global] ).to_affine(input_axes=YX, output_axes=YX) return Image2DModel.parse( image_yx_rgb, transformations={COORD_SYS_GLOBAL: optical_to_global}, dims=YXC, scale_factors=(2, 2, 2), axis_units={Y: MICROMETER, X: MICROMETER}, c_coords=("r", "g", "b"), # rgb=True, )
def _create_spatialdata_table(adata: AnnData) -> AnnData: # Add the index as a separate column for matching rows to regions in labels or points. adata.obs[INSTANCE_KEY] = adata.obs.index.astype(int) # Avoid same name for index and a column name, which is ambiguous and causes warning or error. adata.obs.index.name = "index" adata.obs[REGION_KEY] = pd.Series( [POINTS_KEY] * adata.n_obs, index=adata.obs.index, dtype="category" ) return TableModel.parse( adata, region=[POINTS_KEY], # element in SpatialData to be used region_key=REGION_KEY, # table column containing the regions instance_key=INSTANCE_KEY, # table column to be matched to points index ) def _create_points(dataset: SMDataset, adata: AnnData) -> pd.DataFrame: # FIXME: PointsModel does not yet allow to specify units, we should set micrometers as in image. # FIXME: Workaround for that napari-spatialdata does not allow to visualize values from ``X`` # array, only from ``obs``. For now, we copy X to ``obs`` to enable visualization. # points = PointsModel.parse( # adata.obs[[X, Y]].reset_index(), # transformations={COORD_SYS_GLOBAL: get_ion_image_to_physical_transformation(dataset)}, # ) return PointsModel.parse( pd.concat( [ adata.obs[[COL.ion_image_pixel_x, COL.ion_image_pixel_y]].reset_index(), pd.DataFrame(adata.X, columns=adata.var.index), ], axis=1, ), coordinates={"x": COL.ion_image_pixel_x, "y": COL.ion_image_pixel_y}, transformations={COORD_SYS_GLOBAL: get_ion_image_to_physical_transformation(dataset)}, )
[docs] def get_ion_image_to_physical_transformation(dataset: SMDataset) -> Scale: """ Get the transformation from the ion image (pixels) to the world coordinate system (micrometers). Args: dataset: A METASPACE dataset Returns: A scale SpatialData transformation """ ion_image_pixel_size = dataset.metadata["MS_Analysis"]["Pixel_Size"] scale_factors_xy = [ion_image_pixel_size["Xaxis"], ion_image_pixel_size["Yaxis"]] return Scale(scale=scale_factors_xy, axes=XY)