Source code for zedprofiler.IO.loading_classes

"""Data-loading classes for featurization workflows."""

from __future__ import annotations

import logging
import pathlib
from collections.abc import Iterator
from dataclasses import dataclass

import bioio
import numpy
from beartype import beartype

from zedprofiler.contracts import ImageArrayModel

logging.basicConfig(level=logging.INFO)


@beartype
def _image_loading(image_path: pathlib.Path) -> numpy.ndarray:
    """
    Internal loader using bioio as a backend

    Parameters
    ----------
    image_path : pathlib.Path
        Path to the image to load

    Returns
    -------
    numpy.ndarray
        Image returned
    """
    image = bioio.BioImage(str(image_path))  # selects the first scene found
    return image.get_image_data("ZYX")


[docs] @dataclass class ImageSetConfig: """Configuration options for ImageSetLoader.""" image_set_name: str | None = None label_key_name: list[str] | None = None raw_image_key_name: list[str] | None = None # validate the arg types def __post_init__(self) -> None: """Initialize default values for None fields.""" if not isinstance(self.image_set_name, (str, type(None))): raise TypeError("image_set_name must be a string or None") if not isinstance(self.label_key_name, (list, type(None))): raise TypeError("label_key_name must be a list of strings or None") if not isinstance(self.raw_image_key_name, (list, type(None))): raise TypeError("raw_image_key_name must be a list of strings or None") if self.label_key_name is None: self.label_key_name = [] if self.raw_image_key_name is None: self.raw_image_key_name = []
class _LazyImageSetDict(dict[str, pathlib.Path | numpy.ndarray]): """Dictionary that loads image arrays on first access.""" def __getitem__(self, key: str) -> numpy.ndarray: value = super().__getitem__(key) if isinstance(value, pathlib.Path): value = _image_loading(value) super().__setitem__(key, value) return value def get( self, key: str, default: pathlib.Path | numpy.ndarray | None = None, ) -> pathlib.Path | numpy.ndarray | None: if key in self: return self[key] return default def items(self) -> Iterator[tuple[str, numpy.ndarray]]: for key in dict.__iter__(self): yield key, self[key] def values(self) -> Iterator[numpy.ndarray]: for key in dict.__iter__(self): yield self[key]
[docs] class ImageSetLoader: """ ImageSet in this context refers to a set of images that can be related to each other via their metadata. For example all images coming from the same well, FOV or timepoint but different spectral channels and segmentation labels. Load an image set consisting of raw z stack images and segmentation labels. A class to load an image set consisting of raw z stack images from multiple spectral channels and segmentation labels. The images are loaded into a dictionary, and various attributes and compartments are extracted from the images. The class also provides methods to retrieve images and their attributes. Parameters ---------- image_set_path : pathlib.Path Path to the image set directory. label_set_path : pathlib.Path Path to the label set directory. anisotropy_spacing : tuple The anisotropy spacing of the images in format (z_spacing, y_spacing, x_spacing). channel_mapping : dict A dictionary mapping channel names to their corresponding image file names. Example: ``{'nuclei': 'nuclei_', 'cell': 'cell_', 'cytoplasm': 'cytoplasm_'}`` Attributes ---------- image_set_name : str The name of the image set. anisotropy_spacing : tuple The anisotropy spacing of the images. anisotropy_factor : float The anisotropy factor calculated from the spacing. image_set_dict : dict A dictionary containing the loaded images, with keys as channel names. unique_label_objects : dict A dictionary containing unique object IDs for each label in the image set. unique_compartment_objects : dict A dictionary containing unique object IDs for each compartment in the image set. A compartment is defined as a segmented region in the image (e.g., Cell, Cytoplasm, Nuclei, Organoid). The compartments are bounds for measurements. image_names : list A list of image names in the image set. compartments : list A list of compartment names in the image set. """ def __init__( # noqa: PLR0913 self, anisotropy_spacing: tuple[float, float, float], channel_mapping: dict[str, str], image_set_path: pathlib.Path | None, label_set_path: pathlib.Path | None, image_set_array: numpy.ndarray | None = None, label_set_array: numpy.ndarray | None = None, config: ImageSetConfig | None = None, ) -> None: """Initialize the ImageSetLoader with paths, spacing, and mapping. Parameters ---------- image_set_path : pathlib.Path Path to the image set directory. label_set_path : pathlib.Path | None Path to the label set directory. anisotropy_spacing : tuple The anisotropy spacing of the images. In format (z_spacing, y_spacing, x_spacing). channel_mapping : dict A dictionary mapping channel names to image file names. config : ImageSetConfig | None Optional configuration object with image_set_name, label_key_name, and raw_image_key_name. If None, defaults are used. """ config = config or ImageSetConfig() self._validate_input_sources( image_set_path=image_set_path, label_set_path=label_set_path, image_set_array=image_set_array, label_set_array=label_set_array, ) self.image_set_dict = _LazyImageSetDict() channel_tokens = [str(value) for value in channel_mapping.values()] self.anisotropy_spacing = anisotropy_spacing self.anisotropy_factor = self.anisotropy_spacing[0] / self.anisotropy_spacing[1] self.image_set_name = config.image_set_name self.label_set_path = label_set_path self._load_path_based_images( channel_mapping=channel_mapping, channel_tokens=channel_tokens, image_set_path=image_set_path, label_set_path=label_set_path, ) self._load_array_based_images( config=config, image_set_array=image_set_array, label_set_array=label_set_array, ) self.get_compartments() self.get_image_names() self.get_unique_objects_in_compartments() @staticmethod def _validate_input_sources( image_set_path: pathlib.Path | None, label_set_path: pathlib.Path | None, image_set_array: numpy.ndarray | None, label_set_array: numpy.ndarray | None, ) -> None: """ Validate the input sources such that either the image path or the array is passed through but not neither and not both. Parameters ---------- image_set_path : pathlib.Path | None Path to the image set directory. label_set_path : pathlib.Path | None Path to the label set directory. image_set_array : numpy.ndarray | None Array containing the image data. label_set_array : numpy.ndarray | None Array containing the label data. Raises ------ ValueError If neither image_set_array nor image_set_path is provided, or if neither label_set_array nor label_set_path is provided. ValueError If both image_set_array and image_set_path are provided, or if both label_set_array and label_set_path are provided. """ if image_set_array is None and image_set_path is None: raise ValueError( "Either image_set_array or image_set_path must be provided." ) if label_set_array is None and label_set_path is None: raise ValueError( "Either label_set_array or label_set_path must be provided." ) if image_set_array is not None and image_set_path is not None: raise ValueError( "Only one of image_set_array or image_set_path should be " "provided, not both." ) if label_set_array is not None and label_set_path is not None: raise ValueError( "Only one of label_set_array or label_set_path should be " "provided, not both." ) def _load_path_based_images( self, channel_mapping: dict[str, str], channel_tokens: list[str], image_set_path: pathlib.Path | None, label_set_path: pathlib.Path | None, ) -> None: """ Load the images if a path is given. Note that currently we only load tiffs... Parameters ---------- channel_mapping : dict[str, str] A dictionary mapping channel names to image file name tokens. channel_tokens : list[str] A list of tokens to look for in file names to identify channels. image_set_path : pathlib.Path | None Path to the image set directory. label_set_path : pathlib.Path | None Path to the label set directory. """ if image_set_path is None: return channel_files = sorted(image_set_path.glob("*")) channel_files = [ f for f in channel_files if f.suffix in [".tif", ".tiff"] and any(token in f.name for token in channel_tokens) ] label_files = sorted(label_set_path.glob("*")) if label_set_path else [] label_files = [ f for f in label_files if f.suffix in [".tif", ".tiff"] and any(token in f.name for token in channel_tokens) ] for f in channel_files: for key, value in channel_mapping.items(): if str(value) in f.name: self.image_set_dict[key] = f for f in label_files: for key, value in channel_mapping.items(): if str(value) in f.name: self.image_set_dict[key] = f def _load_array_based_images( self, config: ImageSetConfig, image_set_array: numpy.ndarray | None, label_set_array: numpy.ndarray | None, ) -> None: """ Load the array based images. These are already in memory and stored as numpy arrays. Parameters ---------- config : ImageSetConfig Configuration object containing key names for images and labels. image_set_array : numpy.ndarray | None Array containing the image data. label_set_array : numpy.ndarray | None Array containing the label data. """ if image_set_array is not None: for key in config.raw_image_key_name: # Run through pydantic validation to ensure the array is valid. validated_array = ImageArrayModel(array=image_set_array).array self.image_set_dict[key] = validated_array if label_set_array is not None: for key in config.label_key_name: # Run through pydantic validation to ensure the array is valid. validated_array = ImageArrayModel(array=label_set_array).array self.image_set_dict[key] = validated_array
[docs] def get_unique_objects_in_compartments(self) -> None: """ Populate unique object IDs per compartment. Parameters ---------- None This method does not take any parameters. """ self.unique_compartment_objects = {} if len(self.compartments) == 0: self.compartments = None for compartment in self.compartments: self.unique_compartment_objects[compartment] = numpy.unique( self.get_image(compartment) ) # remove the 0 label self.unique_compartment_objects[compartment] = [ x for x in self.unique_compartment_objects[compartment] if x != 0 ]
[docs] def get_image(self, key: str) -> numpy.ndarray: """Return an image array for a given key. Parameters ---------- key : str Channel or label key. Returns ------- numpy.ndarray Image array for the requested key. """ return self.image_set_dict[key]
[docs] def get_image_names(self) -> list[str]: """Populate image (non-compartment) names. Returns ------- list[str] List of image names excluding compartment labels. """ compartments = ( self.compartments if self.compartments is not None and isinstance(self.compartments, list) else [] ) self.image_names = [x for x in self.image_set_dict if x not in compartments] return self.image_names
[docs] def get_compartments(self) -> list[str]: """Populate compartment names from available keys. Returns ------- list[str] List of compartment keys. """ self.compartments = [ x for x in self.image_set_dict if any( channel_mapping_key in x for channel_mapping_key in self.image_set_dict ) ] return self.compartments
[docs] def get_anisotropy(self) -> float: """Return the anisotropy factor for the image set. Returns ------- float Ratio of z-spacing to y-spacing. """ return self.anisotropy_spacing[0] / self.anisotropy_spacing[1]
[docs] class ObjectLoader: """ A class to load objects from a labeled image and extract their properties. Where an object is defined as a segmented region in the image. This could be a cell, a nucleus, or any other compartment segmented. Parameters ---------- image : numpy.ndarray The image from which to extract objects. Preferably a 3D image -> z, y, x label_image : numpy.ndarray The labeled image containing the segmented objects. channel_name : str The name of the channel from which the objects are extracted. compartment_name : str The name of the compartment from which the objects are extracted. Attributes ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. config : ImageSetConfig The configuration object containing image set parameters. Methods ------- __init__(image, label_image, channel_name, compartment_name) Initializes the ObjectLoader with the image, label image, channel name, and compartment name. """
[docs] def __init__( self, image_set_loader: ImageSetLoader, channel_name: str, compartment_name: str, ) -> None: """Initialize object loader with image and labels. Parameters ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. channel_name : str The name of the channel from which the objects are extracted. compartment_name : str The name of the compartment from which the objects are extracted. """ self.channel = channel_name self.compartment = compartment_name self.image = image_set_loader.get_image(self.channel) if self.channel else None self.label_image = ( image_set_loader.get_image(self.compartment) if self.compartment else None ) # get the labeled image objects self.object_ids = numpy.unique(self.label_image) # drop the 0 label self.object_ids = [x for x in self.object_ids if x != 0] # inherit the image set loader self.image_set_loader = image_set_loader
[docs] class TwoObjectLoader: """ A class to load two images and a label image for a specific compartment. This class is primarily used for loading images for two-channel analysis like co-localization. Parameters ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. compartment : str The name of the compartment for which the label image is loaded. channel1 : str The name of the first channel to be loaded. channel2 : str The name of the second channel to be loaded. Attributes ---------- image_set_loader : ImageSetLoader An instance of the ImageSetLoader class containing the image set. compartment : str The name of the compartment for which the label image is loaded. label_image : numpy.ndarray The labeled image containing the segmented objects for the specified compartment. image1 : numpy.ndarray The image corresponding to the first channel. image2 : numpy.ndarray The image corresponding to the second channel. object_ids : numpy.ndarray The unique object IDs for the segmented objects in the specified compartment. Methods ------- __init__(image_set_loader, compartment, channel1, channel2) Initializes the TwoObjectLoader with the image set loader, compartment, and channel names. """
[docs] def __init__( self, image_set_loader: ImageSetLoader, compartment: str, channel1: str, channel2: str, ) -> None: """Initialize a two-channel loader for a compartment. Parameters ---------- image_set_loader : ImageSetLoader Image set loader containing images and labels. compartment : str Compartment name for the label image. channel1 : str First channel name to load. channel2 : str Second channel name to load. """ self.image_set_loader = image_set_loader self.compartment = compartment self.label_image = self.image_set_loader.get_image(compartment) self.image1 = self.image_set_loader.get_image(channel1) self.image2 = self.image_set_loader.get_image(channel2) self.object_ids = image_set_loader.unique_compartment_objects[compartment] # inherit the image set name for downstream use self.image_set_name = image_set_loader.image_set_name