Source code for zedprofiler.featurization.texture

"""This module generates texture features for each object in the
image using Haralick features.

We do this in a as close to zero-copy way as possible.
We want to make this module fast, memory efficient, and robust to large images
and objects.
We want this module to be python api callable and scalable.
"""

import mahotas
import numpy
import pandas
import skimage
import skimage.measure

from zedprofiler.contracts import validate_column_name_schema
from zedprofiler.IO.feature_writing_utils import format_morphology_feature_name
from zedprofiler.IO.loading_classes import ObjectLoader


[docs] def scale_image(image: numpy.ndarray, num_gray_levels: int = 256) -> numpy.ndarray: """ Scale the image to a specified number of gray levels. Example: 1024 gray levels will be scaled to 256 gray levels if num_gray_levels=256. An image with a pixel value of 0 will be scaled to 0 and a pixel value of 1023 will be scaled to 255. Parameters ---------- image : numpy.ndarray The input image to be scaled. Can be a ndarray of any shape. num_gray_levels : int, optional The number of gray levels to scale the image to, by default 256 Returns ------- numpy.ndarray The gray level scaled image of any shape. """ outrange_mapping = { 256: "uint8", 65536: "uint16", } try: out_range = outrange_mapping.get(num_gray_levels) except KeyError: out_range = None if out_range is None: raise ValueError( f"Unsupported num_gray_levels: {num_gray_levels}. " f"Supported values are: {list(outrange_mapping.keys())}" ) # scale the image to the requested gray levels return skimage.exposure.rescale_intensity( image, in_range="image", out_range=out_range, )
[docs] def compute_texture( # noqa: C901 object_loader: ObjectLoader, distance: int = 1, grayscale: int = 256, ) -> dict: """ Calculate texture features for each object in the image using Haralick features. The features are calculated for each object separately and the mean value is returned. Parameters ---------- object_loader : ObjectLoader The object loader containing the image and object information. distance : int, optional The distance parameter for Haralick features, by default 1 grayscale : int, optional The number of gray levels to scale the image to, by default 256 Returns ------- dict A dictionary containing the object ID, texture name, and texture value with keys: - object_id - texture_name - texture_value Texture names include: Angular Second Moment, Contrast, Correlation, Variance, Inverse Difference Moment, Sum Average, Sum Variance, Sum Entropy, Entropy, and related texture measures. - AngularSecondMoment - Contrast - Correlation - Variance - InverseDifferenceMoment - SumAverage - SumVariance - SumEntropy - Entropy - DifferenceVariance - DifferenceEntropy - InformationMeasureOfCorrelation1 - InformationMeasureOfCorrelation2 """ label_object = object_loader.label_image labels = object_loader.object_ids feature_names = [ "AngularSecondMoment", "Contrast", "Correlation", "Variance", "InverseDifferenceMoment", "SumAverage", "SumVariance", "SumEntropy", "Entropy", "DifferenceVariance", "DifferenceEntropy", "InformationMeasureOfCorrelation1", "InformationMeasureOfCorrelation2", ] # set the number of directions based on the dimensionality of the image n_directions = 13 output_texture_dict = { "Metadata_Object_ObjectID": [], "texture_name": [], "texture_value": [], } # Precompute bboxes for labeled regions to avoid per-object full-array copies. props = skimage.measure.regionprops_table( label_object, properties=["label", "bbox"], ) # Map label id to bbox (z0, y0, x0, z1, y1, x1) label_to_bbox = {} labels_prop = props.get("label", []) for i, lbl in enumerate(labels_prop): label_to_bbox[int(lbl)] = ( int(props["bbox-0"][i]), int(props["bbox-1"][i]), int(props["bbox-2"][i]), int(props["bbox-3"][i]), int(props["bbox-4"][i]), int(props["bbox-5"][i]), ) # loop through each label and get the bounding box # to compute features for the object for _, label in enumerate(labels): if int(label) == 0: continue bbox = label_to_bbox.get(int(label)) if bbox is None: continue min_z, min_y, min_x, max_z, max_y, max_x = bbox # Crop to the object's bounding box (skimage bboxes are half-open) image_object = object_loader.image[min_z:max_z, min_y:max_y, min_x:max_x].copy() selected_label_object = label_object[min_z:max_z, min_y:max_y, min_x:max_x] object_mask = selected_label_object == label if not numpy.any(object_mask): continue image_object[~object_mask] = 0 features = numpy.empty((n_directions, 13, max(labels))) image_object = scale_image(image_object, num_gray_levels=grayscale) try: # calculates 13 Haralick features for each direction (13) # and each object, and stores them in a 3D array features[:, :, label - 1] = mahotas.features.haralick( ignore_zeros=True, f=image_object, distance=distance, compute_14th_feature=False, ) except ValueError: features = numpy.full(len(feature_names), numpy.nan, dtype=float) # iterate through the direction, feature, and object dimensions # of the features array to populate the output dictionary for direction, direction_features in enumerate(features): direction_str = f"{direction:02d}" for feature_name, feature in zip(feature_names, direction_features): for object_id, feature_value in zip(labels, feature): output_texture_dict["Metadata_Object_ObjectID"].append(object_id) output_texture_dict["texture_name"].append( f"{feature_name}-{distance}-{direction_str}-{grayscale}" ) output_texture_dict["texture_value"].append(feature_value) final_df = pandas.DataFrame(output_texture_dict) final_df = final_df.pivot( index="Metadata_Object_ObjectID", columns="texture_name", values="texture_value", ) final_df.reset_index(inplace=True) final_df.rename( columns={ col: format_morphology_feature_name( compartment=object_loader.compartment, channel=object_loader.channel, feature_type="Texture", measurement=col, ) if col != "Metadata_Object_ObjectID" else col for col in final_df.columns }, inplace=True, ) final_df.insert( 0, "Metadata_Experiment_ImageSet", object_loader.image_set_loader.image_set_name, ) final_df.columns.name = None # validate column names against schema result = final_df.to_dict(orient="list") for col in list(result.keys()): try: validate_column_name_schema( column_name=col, compartments=[object_loader.compartment], channels=[f"{object_loader.channel}"], ) except ValueError as e: raise ValueError(f"Column name {col} does not conform to schema: {e}") return final_df