Work with multiple samples (and coordinate systems)

Work with multiple samples (and coordinate systems)#

%load_ext autoreload
%autoreload 2

Load libraries and create an empty spatialdata object that is backed by a zarr store.

import os

import tempfile
import uuid

from spatialdata import SpatialData

import sparrow as sp

sdata = SpatialData()

default_tmp_path = tempfile.gettempdir()

zarr_path = os.path.join(default_tmp_path, f"sdata_{uuid.uuid4()}.zarr")

sdata.write(zarr_path)
INFO     The Zarr backing store has been changed from None the new file path:                                      
         /Users/arnedf/VIB/DATA/test_data/sdata_transformations.zarr

Add image layers.

from dask_image import imread
from spatialdata.transformations import Identity

from sparrow.datasets.registry import get_registry

registry = get_registry()
arr_a1_1 = imread.imread(registry.fetch("transcriptomics/resolve/mouse/20272_slide1_A1-1_DAPI.tiff"))
arr_a1_2 = imread.imread(registry.fetch("transcriptomics/resolve/mouse/20272_slide1_A1-2_DAPI.tiff"))

sdata = sp.im.add_image_layer(
    sdata,
    arr=arr_a1_1,
    output_layer="image_a1_1",
    transformations={"a1_1": Identity()},
    overwrite=True,
)

sdata = sp.im.add_image_layer(
    sdata,
    arr=arr_a1_2,
    output_layer="image_a1_2",
    transformations={"a1_2": Identity()},
    overwrite=True,
)

Specify coordinate system when plotting

sp.pl.plot_image(
    sdata, img_layer="image_a1_1", to_coordinate_system="a1_1", crd=[500, 2000, 2200, 3700], figsize=(3, 3)
)
sp.pl.plot_image(
    sdata, img_layer="image_a1_2", to_coordinate_system="a1_2", crd=[500, 2000, 2200, 3700], figsize=(3, 3)
)
../../_images/8cbd2c84067b300780345e2b6d3a556921a1e9ead827dcf4ef6c24112f88464a.png ../../_images/08a1cfc54652216a6a01f1ee4368d4322f1ac3b08f559f3d93b96fc697669c3e.png

Do some image processing on full image, no need to specify a coordinate system

sdata = sp.im.min_max_filtering(
    sdata, img_layer="image_a1_1", output_layer="image_a1_1_min_max", size_min_max_filter=40, overwrite=True
)

When doing some image processing on a crop, you need to specify the coordinate system to which the crop is defined

sdata = sp.im.min_max_filtering(
    sdata,
    img_layer="image_a1_1",
    output_layer="image_a1_1_min_max",
    size_min_max_filter=40,
    crd=[500, 2000, 2200, 3700],
    to_coordinate_system="a1_1",
    overwrite=True,
)
sdata = sp.im.min_max_filtering(
    sdata,
    img_layer="image_a1_2",
    output_layer="image_a1_2_min_max",
    size_min_max_filter=40,
    crd=[500, 2000, 2200, 3700],
    to_coordinate_system="a1_2",
    overwrite=True,
)
sp.pl.plot_image(
    sdata,
    img_layer="image_a1_1_min_max",
    to_coordinate_system="a1_1",
    figsize=(3, 3),
)
../../_images/5731bb300876009cf85c852f9f7d3ec3ea5c75db13d4ac7f4bf0c23f4d04d6ea.png

Now do segmentation

# no need to specify coordinate system if no crop is specifed
sdata = sp.im.segment(
    sdata,
    img_layer="image_a1_1_min_max",
    output_labels_layer="labels_a1_1",
    output_shapes_layer="shapes_a1_1",
    overwrite=True,
    crd=None,
)

# but need to specify to_coordinate_system when crop is defined

sdata = sp.im.segment(
    sdata,
    img_layer="image_a1_2_min_max",
    output_labels_layer="labels_a1_2",
    output_shapes_layer="shapes_a1_2",
    overwrite=True,
    crd=[600, 2000, 2200, 3700],
    to_coordinate_system="a1_2",
)
sp.pl.plot_shapes(
    sdata,
    img_layer="image_a1_2_min_max",
    shapes_layer="shapes_a1_2",
    to_coordinate_system="a1_2",
    crd=[600, 2000, 2200, 3700],
    figsize=(3, 3),
)
../../_images/aea72d5e8c3b9642685c80e61e1cc55978cae214b30c934316ecfecceb29fa02.png

Read the transcripts downloaded previously. Currently, the transformation specified on the transcripts is always the Identity transformation. The transcripts should be registered with the coordinate system of the image layer via an affine transform matrix that can be specified via the parameter path_transform_matrix. If not specified, the identity matrix will be used.

path_points_a1_1 = registry.fetch("transcriptomics/resolve/mouse/20272_slide1_A1-1_results.txt")
path_points_a1_2 = registry.fetch("transcriptomics/resolve/mouse/20272_slide1_A1-2_results.txt")

kwargs = {
    "column_x": 0,
    "column_y": 1,
    "column_gene": 3,
    "delimiter": "\t",
    "header": None,
    "overwrite": True,
}

sdata = sp.io.read_transcripts(
    sdata, path_count_matrix=path_points_a1_1, to_coordinate_system="a1_1", output_layer="points_a1_1", **kwargs
)

sdata = sp.io.read_transcripts(
    sdata, path_count_matrix=path_points_a1_2, to_coordinate_system="a1_2", output_layer="points_a1_2", **kwargs
)

Verify that transformation associated to points layer is indeed the identity, and verify that shapes and labels have same transformation associated to them.

from spatialdata.transformations import get_transformation

print("points:\n", get_transformation(sdata["points_a1_2"], get_all=True))
print(
    "image\n", get_transformation(sdata["image_a1_2_min_max"], get_all=True)
)  # one crop was taken, so one translation is defined on image.
print(
    "labels:\n", get_transformation(sdata["labels_a1_2"], get_all=True)
)  # two times a crop was taken, so a sequence of two translations is defined on labels/shapes.
print(
    "shapes:\n", get_transformation(sdata["shapes_a1_2"], get_all=True)
)  # same transformation defined on associated shapes layer.
points:
 {'a1_2': Identity }
image
 {'a1_2': Sequence 
    Translation (c, y, x)
        [   0. 2200.  500.]
    Identity }
labels:
 {'a1_2': Sequence 
    Translation (y, x)
        [  0. 100.]
    Sequence 
        Translation (y, x)
            [2200.  500.]
        Identity }
shapes:
 {'a1_2': Sequence 
    Translation (x, y)
        [100.   0.]
    Sequence 
        Translation (x, y)
            [ 500. 2200.]
        Identity }
sp.pl.sanity_plot_transcripts_matrix(
    sdata,
    img_layer="image_a1_1_min_max",
    shapes_layer="shapes_a1_1",
    points_layer="points_a1_1",
    to_coordinate_system="a1_1",
    figsize=(5, 5),
    n_sample=10000,
)
../../_images/e7c00f3f8cf730f4e435f5357fc02df9b0b682bc9f6b2a92b88a0413c78a7520.png
sp.pl.sanity_plot_transcripts_matrix(
    sdata,
    img_layer="image_a1_2_min_max",
    shapes_layer="shapes_a1_2",
    points_layer="points_a1_2",
    to_coordinate_system="a1_2",
    figsize=(5, 5),
    n_sample=10000,
)
../../_images/5b0e1a31f756e206104d5e3f165f73a5e9c79d17674e6ebb4526506f3bc80dd2.png
sdata = sp.tb.allocate(
    sdata,
    labels_layer="labels_a1_1",
    points_layer="points_a1_1",
    to_coordinate_system="a1_1",
    output_layer="table",
    append=False,
    overwrite=True,
)

# append gene count of labels_a1_2 and points_a1_2 to anndata object with name 'table'
sdata = sp.tb.allocate(
    sdata,
    labels_layer="labels_a1_2",
    points_layer="points_a1_2",
    to_coordinate_system="a1_2",
    output_layer="table",
    append=True,
    overwrite=True,
)
sdata = sp.tb.preprocess_transcriptomics(
    sdata,
    labels_layer=["labels_a1_1", "labels_a1_2"],
    table_layer="table",
    output_layer="table_preprocessed",
    overwrite=True,
)  # we can also choose to set output_layer equal to 'table'.
sdata = sp.tb.filter_on_size(
    sdata,
    labels_layer=["labels_a1_1", "labels_a1_2"],
    table_layer="table_preprocessed",
    output_layer="table_filtered",
    min_size=1000,
    overwrite=True,
)
sdata
SpatialData object, with associated Zarr store: /Users/arnedf/VIB/DATA/test_data/sdata_transformations.zarr
├── Images
│     ├── 'image_a1_1': DataArray[cyx] (1, 12864, 10720)
│     ├── 'image_a1_1_min_max': DataArray[cyx] (1, 1500, 1500)
│     ├── 'image_a1_2': DataArray[cyx] (1, 10720, 8576)
│     └── 'image_a1_2_min_max': DataArray[cyx] (1, 1500, 1500)
├── Labels
│     ├── 'labels_a1_1': DataArray[yx] (1500, 1500)
│     └── 'labels_a1_2': DataArray[yx] (1500, 1400)
├── Points
│     ├── 'points_a1_1': DataFrame with shape: (<Delayed>, 3) (2D points)
│     └── 'points_a1_2': DataFrame with shape: (<Delayed>, 3) (2D points)
├── Shapes
│     ├── 'filtered_low_counts_shapes_a1_1': GeoDataFrame shape: (3, 1) (2D shapes)
│     ├── 'filtered_low_counts_shapes_a1_2': GeoDataFrame shape: (1, 1) (2D shapes)
│     ├── 'filtered_size_shapes_a1_1': GeoDataFrame shape: (28, 1) (2D shapes)
│     ├── 'filtered_size_shapes_a1_2': GeoDataFrame shape: (16, 1) (2D shapes)
│     ├── 'shapes_a1_1': GeoDataFrame shape: (134, 1) (2D shapes)
│     └── 'shapes_a1_2': GeoDataFrame shape: (164, 1) (2D shapes)
└── Tables
      ├── 'table': AnnData (346, 86)
      ├── 'table_filtered': AnnData (298, 61)
      └── 'table_preprocessed': AnnData (342, 61)
with coordinate systems:
    ▸ 'a1_1', with elements:
        image_a1_1 (Images), image_a1_1_min_max (Images), labels_a1_1 (Labels), points_a1_1 (Points), filtered_low_counts_shapes_a1_1 (Shapes), filtered_size_shapes_a1_1 (Shapes), shapes_a1_1 (Shapes)
    ▸ 'a1_2', with elements:
        image_a1_2 (Images), image_a1_2_min_max (Images), labels_a1_2 (Labels), points_a1_2 (Points), filtered_low_counts_shapes_a1_2 (Shapes), filtered_size_shapes_a1_2 (Shapes), shapes_a1_2 (Shapes)
assert sdata["table_filtered"].shape[0] == len(sdata["shapes_a1_1"]) + len(sdata["shapes_a1_2"])
from sparrow.utils._keys import _REGION_KEY

for to_coordinate_system in ["a1_1", "a1_2"]:
    assert sdata["table"][sdata["table"].obs[_REGION_KEY] == f"labels_{to_coordinate_system}"].shape[0] == len(
        sdata[f"shapes_{to_coordinate_system}"]
    ) + len(sdata[f"filtered_low_counts_shapes_{to_coordinate_system}"]) + len(
        sdata[f"filtered_size_shapes_{to_coordinate_system}"]
    )
zarr_path = os.path.join(default_tmp_path, f"sdata_{uuid.uuid4()}.zarr")  # output path for queried sdata

labels_layer = [
    "labels_a1_1",
    "labels_a1_2",
]
crd = [
    (600, 1000, 2300, 3000),
    (700, 1000, 2300, 2500),
]
to_coordinate_system = ["a1_1", "a1_2"]

sdata_queried = sp.utils.bounding_box_query(
    sdata,
    labels_layer=labels_layer,
    crd=crd,
    to_coordinate_system=to_coordinate_system,
    copy_img_layer=False,
    copy_shapes_layer=False,
    copy_points_layer=False,
    output=zarr_path,
)

for _labels_layer in labels_layer:
    assert _labels_layer in sdata_queried.labels