Skip to content

TESSERA module

TESSERA module for accessing geospatial foundation model embeddings.

This module provides tools for working with TESSERA (Temporal Embeddings of Surface Spectra for Earth Representation and Analysis) embeddings via the GeoTessera library. TESSERA is a foundation model developed at the University of Cambridge that processes time-series Sentinel-1 and Sentinel-2 satellite imagery to generate 128-channel representation maps at 10m resolution globally.

Reference

Feng et al., "TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis," ArXiv preprint, 2025. https://arxiv.org/abs/2506.20380

Repository: https://github.com/ucam-eo/tessera GeoTessera library: https://github.com/ucam-eo/geotessera

tessera_available_years(dataset_version='v1', **kwargs)

Get list of years with available TESSERA embeddings.

Parameters:

Name Type Description Default
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
List[int]

List of available years sorted in ascending order.

Example

import geoai years = geoai.tessera_available_years() print(f"Available years: {years}")

Source code in geoai/tessera.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
def tessera_available_years(
    dataset_version: str = "v1",
    **kwargs,
) -> List[int]:
    """Get list of years with available TESSERA embeddings.

    Args:
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        List of available years sorted in ascending order.

    Example:
        >>> import geoai
        >>> years = geoai.tessera_available_years()
        >>> print(f"Available years: {years}")
    """
    _check_geotessera()
    from geotessera import GeoTessera

    gt = GeoTessera(dataset_version=dataset_version, **kwargs)
    return gt.registry.get_available_years()

tessera_coverage(year=None, output_path='tessera_coverage.png', region_bbox=None, region_file=None, tile_color='red', tile_alpha=0.6, width_pixels=2000, show_countries=True, dataset_version='v1', **kwargs)

Generate a coverage map showing TESSERA data availability.

Creates a PNG map showing which tiles have embeddings available for the specified year and region. This is the recommended first step before downloading data to verify availability.

Parameters:

Name Type Description Default
year Optional[int]

Specific year to visualize coverage for. If None, shows all available years with color coding. Defaults to None.

None
output_path str

Path for the output PNG file. Defaults to "tessera_coverage.png".

'tessera_coverage.png'
region_bbox Optional[Tuple[float, float, float, float]]

Optional bounding box (min_lon, min_lat, max_lon, max_lat) to focus on a specific region.

None
region_file Optional[str]

Optional path to a GeoJSON/Shapefile to focus on a region.

None
tile_color str

Color for tile rectangles. Defaults to "red".

'red'
tile_alpha float

Transparency of tile rectangles (0-1). Defaults to 0.6.

0.6
width_pixels int

Width of output image in pixels. Defaults to 2000.

2000
show_countries bool

Whether to show country boundaries. Defaults to True.

True
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
str

Path to the created coverage map PNG file.

Example

import geoai

Check global coverage for 2024

geoai.tessera_coverage(year=2024)

Check coverage for a specific region

geoai.tessera_coverage( ... year=2024, ... region_bbox=(-10, 35, 40, 60), ... output_path="europe_coverage.png" ... )

Source code in geoai/tessera.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
def tessera_coverage(
    year: Optional[int] = None,
    output_path: str = "tessera_coverage.png",
    region_bbox: Optional[Tuple[float, float, float, float]] = None,
    region_file: Optional[str] = None,
    tile_color: str = "red",
    tile_alpha: float = 0.6,
    width_pixels: int = 2000,
    show_countries: bool = True,
    dataset_version: str = "v1",
    **kwargs,
) -> str:
    """Generate a coverage map showing TESSERA data availability.

    Creates a PNG map showing which tiles have embeddings available for the
    specified year and region. This is the recommended first step before
    downloading data to verify availability.

    Args:
        year: Specific year to visualize coverage for. If None, shows all
            available years with color coding. Defaults to None.
        output_path: Path for the output PNG file. Defaults to
            "tessera_coverage.png".
        region_bbox: Optional bounding box (min_lon, min_lat, max_lon, max_lat)
            to focus on a specific region.
        region_file: Optional path to a GeoJSON/Shapefile to focus on a region.
        tile_color: Color for tile rectangles. Defaults to "red".
        tile_alpha: Transparency of tile rectangles (0-1). Defaults to 0.6.
        width_pixels: Width of output image in pixels. Defaults to 2000.
        show_countries: Whether to show country boundaries. Defaults to True.
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        Path to the created coverage map PNG file.

    Example:
        >>> import geoai
        >>> # Check global coverage for 2024
        >>> geoai.tessera_coverage(year=2024)
        >>> # Check coverage for a specific region
        >>> geoai.tessera_coverage(
        ...     year=2024,
        ...     region_bbox=(-10, 35, 40, 60),
        ...     output_path="europe_coverage.png"
        ... )
    """
    _check_geotessera()
    from geotessera import GeoTessera
    from geotessera.visualization import visualize_global_coverage

    gt = GeoTessera(dataset_version=dataset_version, **kwargs)

    result = visualize_global_coverage(
        tessera_client=gt,
        output_path=output_path,
        year=year,
        width_pixels=width_pixels,
        show_countries=show_countries,
        tile_color=tile_color,
        tile_alpha=tile_alpha,
        region_bbox=region_bbox,
        region_file=region_file,
    )

    logger.info(f"Coverage map saved to {result}")
    return result

tessera_download(bbox=None, lon=None, lat=None, year=2024, output_dir='./tessera_output', output_format='tiff', bands=None, compress='lzw', region_file=None, dataset_version='v1', **kwargs)

Download TESSERA embeddings for a geographic region.

Downloads pre-computed TESSERA foundation model embeddings at 10m resolution. Embeddings are 128-channel representations that compress a full year of Sentinel-1 and Sentinel-2 temporal-spectral features.

Parameters:

Name Type Description Default
bbox Optional[Tuple[float, float, float, float]]

Bounding box as (min_lon, min_lat, max_lon, max_lat). Either bbox or lon/lat must be provided.

None
lon Optional[float]

Longitude for a single tile download. Used with lat.

None
lat Optional[float]

Latitude for a single tile download. Used with lon.

None
year int

Year of embeddings to download. Available years: 2017-2024. Defaults to 2024.

2024
output_dir str

Directory to save downloaded files. Defaults to "./tessera_output".

'./tessera_output'
output_format str

Output format, either "tiff" (georeferenced GeoTIFF) or "npy" (raw numpy arrays with metadata JSON). Defaults to "tiff".

'tiff'
bands Optional[List[int]]

List of specific band indices to download (0-127). If None, all 128 bands are downloaded. Defaults to None.

None
compress str

Compression method for GeoTIFF output. Options: "lzw", "deflate", "zstd", "none". Defaults to "lzw".

'lzw'
region_file Optional[str]

Path to a GeoJSON or Shapefile to define the download region. If provided, overrides bbox.

None
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
List[str]

List of file paths for downloaded files.

Raises:

Type Description
ImportError

If geotessera package is not installed.

ValueError

If neither bbox, lon/lat, nor region_file is provided.

Example

import geoai

Download embeddings for a bounding box

files = geoai.tessera_download( ... bbox=(-0.2, 51.4, 0.1, 51.6), ... year=2024, ... output_dir="./london_embeddings" ... )

Download a single tile

files = geoai.tessera_download( ... lon=0.15, lat=52.05, ... year=2024, ... output_dir="./cambridge_tile" ... )

Download specific bands only

files = geoai.tessera_download( ... bbox=(-0.2, 51.4, 0.1, 51.6), ... bands=[0, 1, 2], ... output_dir="./london_rgb" ... )

Source code in geoai/tessera.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def tessera_download(
    bbox: Optional[Tuple[float, float, float, float]] = None,
    lon: Optional[float] = None,
    lat: Optional[float] = None,
    year: int = 2024,
    output_dir: str = "./tessera_output",
    output_format: str = "tiff",
    bands: Optional[List[int]] = None,
    compress: str = "lzw",
    region_file: Optional[str] = None,
    dataset_version: str = "v1",
    **kwargs,
) -> List[str]:
    """Download TESSERA embeddings for a geographic region.

    Downloads pre-computed TESSERA foundation model embeddings at 10m resolution.
    Embeddings are 128-channel representations that compress a full year of
    Sentinel-1 and Sentinel-2 temporal-spectral features.

    Args:
        bbox: Bounding box as (min_lon, min_lat, max_lon, max_lat). Either bbox
            or lon/lat must be provided.
        lon: Longitude for a single tile download. Used with lat.
        lat: Latitude for a single tile download. Used with lon.
        year: Year of embeddings to download. Available years: 2017-2024.
            Defaults to 2024.
        output_dir: Directory to save downloaded files. Defaults to
            "./tessera_output".
        output_format: Output format, either "tiff" (georeferenced GeoTIFF)
            or "npy" (raw numpy arrays with metadata JSON). Defaults to "tiff".
        bands: List of specific band indices to download (0-127). If None,
            all 128 bands are downloaded. Defaults to None.
        compress: Compression method for GeoTIFF output. Options: "lzw",
            "deflate", "zstd", "none". Defaults to "lzw".
        region_file: Path to a GeoJSON or Shapefile to define the download
            region. If provided, overrides bbox.
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        List of file paths for downloaded files.

    Raises:
        ImportError: If geotessera package is not installed.
        ValueError: If neither bbox, lon/lat, nor region_file is provided.

    Example:
        >>> import geoai
        >>> # Download embeddings for a bounding box
        >>> files = geoai.tessera_download(
        ...     bbox=(-0.2, 51.4, 0.1, 51.6),
        ...     year=2024,
        ...     output_dir="./london_embeddings"
        ... )
        >>> # Download a single tile
        >>> files = geoai.tessera_download(
        ...     lon=0.15, lat=52.05,
        ...     year=2024,
        ...     output_dir="./cambridge_tile"
        ... )
        >>> # Download specific bands only
        >>> files = geoai.tessera_download(
        ...     bbox=(-0.2, 51.4, 0.1, 51.6),
        ...     bands=[0, 1, 2],
        ...     output_dir="./london_rgb"
        ... )
    """
    _check_geotessera()
    from geotessera import GeoTessera

    gt = GeoTessera(dataset_version=dataset_version, **kwargs)

    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Determine tiles to fetch
    if region_file is not None:
        import geopandas as gpd

        gdf = gpd.read_file(region_file)
        bounds = gdf.total_bounds  # (minx, miny, maxx, maxy)
        bbox = (bounds[0], bounds[1], bounds[2], bounds[3])
    elif bbox is not None:
        pass  # Use bbox directly
    elif lon is not None and lat is not None:
        # Single tile mode - create a small bbox around the point
        bbox = (lon - 0.05, lat - 0.05, lon + 0.05, lat + 0.05)
    else:
        raise ValueError("Must provide one of: bbox, lon/lat pair, or region_file")

    tiles_to_fetch = gt.registry.load_blocks_for_region(bounds=bbox, year=year)
    num_tiles = len(tiles_to_fetch)
    logger.info(f"Found {num_tiles} tiles to download for year {year}")

    if num_tiles == 0:
        logger.warning(
            f"No tiles available for the specified region and year {year}. "
            "Use tessera_coverage() to check data availability."
        )
        return []

    created_files = []

    if output_format.lower() == "tiff":
        files = gt.export_embedding_geotiffs(
            tiles_to_fetch=tiles_to_fetch,
            output_dir=str(output_path),
            bands=bands,
            compress=compress,
        )
        created_files = [str(f) for f in files]
        logger.info(f"Exported {len(created_files)} GeoTIFF files to {output_dir}")
    elif output_format.lower() == "npy":
        import json

        metadata = {
            "year": year,
            "bbox": list(bbox) if bbox else None,
            "bands": bands,
            "version": dataset_version,
            "tiles": [],
        }
        for yr, tile_lon, tile_lat, embedding, crs, transform in gt.fetch_embeddings(
            tiles_to_fetch
        ):
            if bands is not None:
                embedding = embedding[:, :, bands]

            filename = f"grid_{tile_lon:.2f}_{tile_lat:.2f}_{yr}.npy"
            filepath = output_path / filename
            np.save(str(filepath), embedding)
            created_files.append(str(filepath))

            metadata["tiles"].append(
                {
                    "file": filename,
                    "lon": tile_lon,
                    "lat": tile_lat,
                    "year": yr,
                    "shape": list(embedding.shape),
                    "crs": str(crs) if crs else None,
                }
            )

        # Save metadata
        meta_path = output_path / "metadata.json"
        with open(meta_path, "w") as f:
            json.dump(metadata, f, indent=2)
        created_files.append(str(meta_path))
        logger.info(f"Saved {len(created_files) - 1} numpy arrays to {output_dir}")
    else:
        raise ValueError(f"Unsupported format: {output_format}. Use 'tiff' or 'npy'.")

    return created_files

tessera_fetch_embeddings(bbox, year=2024, bands=None, dataset_version='v1', **kwargs)

Fetch TESSERA embeddings as numpy arrays without saving to disk.

This function retrieves embeddings directly into memory, useful for immediate analysis without file I/O overhead.

Parameters:

Name Type Description Default
bbox Tuple[float, float, float, float]

Bounding box as (min_lon, min_lat, max_lon, max_lat).

required
year int

Year of embeddings. Defaults to 2024.

2024
bands Optional[List[int]]

List of specific band indices to extract (0-127). If None, all 128 bands are returned. Defaults to None.

None
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
list

List of dictionaries, each containing: - "embedding": numpy array of shape (H, W, C) - "lon": tile center longitude - "lat": tile center latitude - "year": tile year - "crs": coordinate reference system - "transform": affine transform

Example

import geoai tiles = geoai.tessera_fetch_embeddings( ... bbox=(-0.2, 51.4, 0.1, 51.6), ... year=2024 ... ) for tile in tiles: ... print(f"Tile ({tile['lon']}, {tile['lat']}): {tile['embedding'].shape}")

Source code in geoai/tessera.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
def tessera_fetch_embeddings(
    bbox: Tuple[float, float, float, float],
    year: int = 2024,
    bands: Optional[List[int]] = None,
    dataset_version: str = "v1",
    **kwargs,
) -> list:
    """Fetch TESSERA embeddings as numpy arrays without saving to disk.

    This function retrieves embeddings directly into memory, useful for
    immediate analysis without file I/O overhead.

    Args:
        bbox: Bounding box as (min_lon, min_lat, max_lon, max_lat).
        year: Year of embeddings. Defaults to 2024.
        bands: List of specific band indices to extract (0-127). If None,
            all 128 bands are returned. Defaults to None.
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        List of dictionaries, each containing:
            - "embedding": numpy array of shape (H, W, C)
            - "lon": tile center longitude
            - "lat": tile center latitude
            - "year": tile year
            - "crs": coordinate reference system
            - "transform": affine transform

    Example:
        >>> import geoai
        >>> tiles = geoai.tessera_fetch_embeddings(
        ...     bbox=(-0.2, 51.4, 0.1, 51.6),
        ...     year=2024
        ... )
        >>> for tile in tiles:
        ...     print(f"Tile ({tile['lon']}, {tile['lat']}): {tile['embedding'].shape}")
    """
    _check_geotessera()
    from geotessera import GeoTessera

    gt = GeoTessera(dataset_version=dataset_version, **kwargs)
    tiles_to_fetch = gt.registry.load_blocks_for_region(bounds=bbox, year=year)

    results = []
    for yr, tile_lon, tile_lat, embedding, crs, transform in gt.fetch_embeddings(
        tiles_to_fetch
    ):
        if bands is not None:
            embedding = embedding[:, :, bands]

        results.append(
            {
                "embedding": embedding,
                "lon": tile_lon,
                "lat": tile_lat,
                "year": yr,
                "crs": crs,
                "transform": transform,
            }
        )

    return results

tessera_sample_points(points, year=2024, embeddings_dir=None, auto_download=True, dataset_version='v1', **kwargs)

Sample TESSERA embeddings at specific point locations.

Extracts 128-dimensional embedding vectors at given geographic point locations. Useful for generating features for downstream tasks such as classification, regression, or clustering.

Parameters:

Name Type Description Default
points Union[str, GeoDataFrame]

GeoDataFrame with point geometries or path to a file (GeoJSON, Shapefile, etc.) containing point locations.

required
year int

Year of embeddings to sample. Defaults to 2024.

2024
embeddings_dir Optional[str]

Directory containing pre-downloaded embedding tiles. If None, uses current directory. Tiles are downloaded automatically if auto_download is True.

None
auto_download bool

Whether to automatically download missing tiles. Defaults to True.

True
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
GeoDataFrame

GeoDataFrame with the original columns plus 128 new columns

GeoDataFrame

(tessera_0 through tessera_127) containing embedding values.

Example

import geoai import geopandas as gpd from shapely.geometry import Point

Create sample points

points = gpd.GeoDataFrame( ... {"id": [1, 2]}, ... geometry=[Point(0.15, 52.05), Point(0.25, 52.15)], ... crs="EPSG:4326" ... )

Sample embeddings

result = geoai.tessera_sample_points(points, year=2024) print(result.columns.tolist())

Source code in geoai/tessera.py
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
def tessera_sample_points(
    points: Union[str, "gpd.GeoDataFrame"],
    year: int = 2024,
    embeddings_dir: Optional[str] = None,
    auto_download: bool = True,
    dataset_version: str = "v1",
    **kwargs,
) -> "gpd.GeoDataFrame":
    """Sample TESSERA embeddings at specific point locations.

    Extracts 128-dimensional embedding vectors at given geographic point
    locations. Useful for generating features for downstream tasks such
    as classification, regression, or clustering.

    Args:
        points: GeoDataFrame with point geometries or path to a file
            (GeoJSON, Shapefile, etc.) containing point locations.
        year: Year of embeddings to sample. Defaults to 2024.
        embeddings_dir: Directory containing pre-downloaded embedding tiles.
            If None, uses current directory. Tiles are downloaded automatically
            if auto_download is True.
        auto_download: Whether to automatically download missing tiles.
            Defaults to True.
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        GeoDataFrame with the original columns plus 128 new columns
        (tessera_0 through tessera_127) containing embedding values.

    Example:
        >>> import geoai
        >>> import geopandas as gpd
        >>> from shapely.geometry import Point
        >>> # Create sample points
        >>> points = gpd.GeoDataFrame(
        ...     {"id": [1, 2]},
        ...     geometry=[Point(0.15, 52.05), Point(0.25, 52.15)],
        ...     crs="EPSG:4326"
        ... )
        >>> # Sample embeddings
        >>> result = geoai.tessera_sample_points(points, year=2024)
        >>> print(result.columns.tolist())
    """
    _check_geotessera()
    import geopandas as gpd
    from geotessera import GeoTessera

    # Load points if path is provided
    if isinstance(points, (str, Path)):
        gdf = gpd.read_file(points)
    else:
        gdf = points.copy()

    # Ensure CRS is WGS84
    if gdf.crs is not None and gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(epsg=4326)

    gt_kwargs = dict(dataset_version=dataset_version, **kwargs)
    if embeddings_dir is not None:
        gt_kwargs["embeddings_dir"] = embeddings_dir

    gt = GeoTessera(**gt_kwargs)

    # Get bounding box of all points
    bounds = gdf.total_bounds
    bbox = (bounds[0], bounds[1], bounds[2], bounds[3])

    # Fetch tiles for the region
    tiles_to_fetch = gt.registry.load_blocks_for_region(bounds=bbox, year=year)

    # Build a spatial index of tiles
    tile_data = {}
    for yr, tile_lon, tile_lat, embedding, crs, transform in gt.fetch_embeddings(
        tiles_to_fetch
    ):
        tile_data[(tile_lon, tile_lat)] = {
            "embedding": embedding,
            "crs": crs,
            "transform": transform,
        }

    # Sample each point
    embedding_values = []
    for _, row in gdf.iterrows():
        point_lon = row.geometry.x
        point_lat = row.geometry.y

        # Find the tile containing this point
        tile_lon = round(round(point_lon / 0.1) * 0.1 + 0.05, 2)
        tile_lat = round(round(point_lat / 0.1) * 0.1 + 0.05, 2)

        tile = tile_data.get((tile_lon, tile_lat))
        if tile is None:
            # Try nearby tiles
            found = False
            for (tlon, tlat), tdata in tile_data.items():
                if abs(tlon - point_lon) <= 0.1 and abs(tlat - point_lat) <= 0.1:
                    tile = tdata
                    found = True
                    break
            if not found:
                embedding_values.append([np.nan] * 128)
                continue

        # Convert point to pixel coordinates using the transform
        try:
            import rasterio
            from rasterio.warp import transform as transform_coords

            if tile["crs"] and str(tile["crs"]) != "EPSG:4326":
                xs, ys = transform_coords(
                    "EPSG:4326", tile["crs"], [point_lon], [point_lat]
                )
                px_x, px_y = ~tile["transform"] * (xs[0], ys[0])
            else:
                px_x, px_y = ~tile["transform"] * (point_lon, point_lat)

            px_x, px_y = int(px_x), int(px_y)
            h, w = tile["embedding"].shape[:2]

            if 0 <= px_x < w and 0 <= px_y < h:
                values = tile["embedding"][px_y, px_x, :].tolist()
                embedding_values.append(values)
            else:
                embedding_values.append([np.nan] * TESSERA_EMBEDDING_DIM)
        except Exception:
            logger.exception("Error sampling point (%s, %s)", point_lon, point_lat)
            embedding_values.append([np.nan] * TESSERA_EMBEDDING_DIM)

    # Add embedding columns to the GeoDataFrame
    import pandas as pd

    embedding_cols = [f"tessera_{i}" for i in range(TESSERA_EMBEDDING_DIM)]
    embedding_df = pd.DataFrame(embedding_values, columns=embedding_cols)
    embedding_df.index = gdf.index

    result = pd.concat([gdf, embedding_df], axis=1)
    result = gpd.GeoDataFrame(result, geometry=gdf.geometry.name, crs=gdf.crs)

    return result

tessera_tile_count(bbox, year=2024, dataset_version='v1', **kwargs)

Get the number of available TESSERA tiles in a bounding box.

Useful for estimating download size before fetching data.

Parameters:

Name Type Description Default
bbox Tuple[float, float, float, float]

Bounding box as (min_lon, min_lat, max_lon, max_lat).

required
year int

Year to check. Defaults to 2024.

2024
dataset_version str

TESSERA dataset version. Defaults to "v1".

'v1'
**kwargs

Additional keyword arguments passed to GeoTessera constructor.

{}

Returns:

Type Description
int

Number of available tiles.

Example

import geoai count = geoai.tessera_tile_count( ... bbox=(-0.2, 51.4, 0.1, 51.6), ... year=2024 ... ) print(f"{count} tiles available")

Source code in geoai/tessera.py
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
def tessera_tile_count(
    bbox: Tuple[float, float, float, float],
    year: int = 2024,
    dataset_version: str = "v1",
    **kwargs,
) -> int:
    """Get the number of available TESSERA tiles in a bounding box.

    Useful for estimating download size before fetching data.

    Args:
        bbox: Bounding box as (min_lon, min_lat, max_lon, max_lat).
        year: Year to check. Defaults to 2024.
        dataset_version: TESSERA dataset version. Defaults to "v1".
        **kwargs: Additional keyword arguments passed to GeoTessera constructor.

    Returns:
        Number of available tiles.

    Example:
        >>> import geoai
        >>> count = geoai.tessera_tile_count(
        ...     bbox=(-0.2, 51.4, 0.1, 51.6),
        ...     year=2024
        ... )
        >>> print(f"{count} tiles available")
    """
    _check_geotessera()
    from geotessera import GeoTessera

    gt = GeoTessera(dataset_version=dataset_version, **kwargs)
    return gt.embeddings_count(bbox=bbox, year=year)

tessera_visualize_rgb(geotiff_dir, bands=(0, 1, 2), output_path=None, normalize=True, figsize=(12, 8), title=None, **kwargs)

Visualize TESSERA embeddings as an RGB composite image.

Creates a false-color RGB visualization from three selected embedding bands. This helps with visual inspection and understanding of the embedding spatial patterns.

Parameters:

Name Type Description Default
geotiff_dir str

Directory containing TESSERA GeoTIFF files or path to a single GeoTIFF file.

required
bands Tuple[int, int, int]

Tuple of three band indices to use as (R, G, B). Defaults to (0, 1, 2).

(0, 1, 2)
output_path Optional[str]

Optional path to save the visualization. If None, displays with matplotlib. Defaults to None.

None
normalize bool

Whether to normalize band values to 0-1 range using percentile stretching. Defaults to True.

True
figsize Tuple[int, int]

Figure size as (width, height) in inches. Defaults to (12, 8).

(12, 8)
title Optional[str]

Optional title for the plot. Defaults to None.

None
**kwargs

Additional keyword arguments passed to matplotlib imshow.

{}

Returns:

Type Description
Optional[str]

Path to saved image if output_path is provided, otherwise None.

Example

import geoai

Download embeddings first

files = geoai.tessera_download( ... bbox=(-0.2, 51.4, 0.1, 51.6), ... output_dir="./london" ... )

Visualize with default bands

geoai.tessera_visualize_rgb("./london")

Use different band combination

geoai.tessera_visualize_rgb("./london", bands=(30, 60, 90))

Source code in geoai/tessera.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
def tessera_visualize_rgb(
    geotiff_dir: str,
    bands: Tuple[int, int, int] = (0, 1, 2),
    output_path: Optional[str] = None,
    normalize: bool = True,
    figsize: Tuple[int, int] = (12, 8),
    title: Optional[str] = None,
    **kwargs,
) -> Optional[str]:
    """Visualize TESSERA embeddings as an RGB composite image.

    Creates a false-color RGB visualization from three selected embedding
    bands. This helps with visual inspection and understanding of the
    embedding spatial patterns.

    Args:
        geotiff_dir: Directory containing TESSERA GeoTIFF files or path to
            a single GeoTIFF file.
        bands: Tuple of three band indices to use as (R, G, B). Defaults
            to (0, 1, 2).
        output_path: Optional path to save the visualization. If None,
            displays with matplotlib. Defaults to None.
        normalize: Whether to normalize band values to 0-1 range using
            percentile stretching. Defaults to True.
        figsize: Figure size as (width, height) in inches. Defaults to
            (12, 8).
        title: Optional title for the plot. Defaults to None.
        **kwargs: Additional keyword arguments passed to matplotlib imshow.

    Returns:
        Path to saved image if output_path is provided, otherwise None.

    Example:
        >>> import geoai
        >>> # Download embeddings first
        >>> files = geoai.tessera_download(
        ...     bbox=(-0.2, 51.4, 0.1, 51.6),
        ...     output_dir="./london"
        ... )
        >>> # Visualize with default bands
        >>> geoai.tessera_visualize_rgb("./london")
        >>> # Use different band combination
        >>> geoai.tessera_visualize_rgb("./london", bands=(30, 60, 90))
    """
    import matplotlib.pyplot as plt
    import rasterio
    from rasterio.merge import merge

    geotiff_path = Path(geotiff_dir)

    # Collect GeoTIFF files
    if geotiff_path.is_file():
        tiff_files = [geotiff_path]
    elif geotiff_path.is_dir():
        tiff_files = sorted(geotiff_path.glob("*.tif")) + sorted(
            geotiff_path.glob("*.tiff")
        )
    else:
        raise FileNotFoundError(f"Path not found: {geotiff_dir}")

    if not tiff_files:
        raise FileNotFoundError(f"No GeoTIFF files found in {geotiff_dir}")

    # Read and merge if multiple files
    if len(tiff_files) == 1:
        with rasterio.open(tiff_files[0]) as src:
            # Read the three specified bands (1-indexed in rasterio)
            rgb = np.stack([src.read(b + 1) for b in bands], axis=-1).astype(np.float32)
    else:
        # Merge multiple tiles
        datasets = []
        try:
            for f in tiff_files:
                datasets.append(rasterio.open(f))
            merged, _ = merge(datasets, indexes=[b + 1 for b in bands])
            rgb = np.moveaxis(merged, 0, -1).astype(np.float32)
        finally:
            for ds in datasets:
                ds.close()

    # Normalize for display
    if normalize:
        for i in range(3):
            band = rgb[:, :, i]
            valid = band[~np.isnan(band)]
            if len(valid) > 0:
                vmin = np.percentile(valid, 2)
                vmax = np.percentile(valid, 98)
                if vmax > vmin:
                    rgb[:, :, i] = np.clip((band - vmin) / (vmax - vmin), 0, 1)
                else:
                    rgb[:, :, i] = 0

    # Handle NaN values
    rgb = np.nan_to_num(rgb, nan=0.0)

    # Plot
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    ax.imshow(rgb, **kwargs)
    ax.set_axis_off()
    if title:
        ax.set_title(title, fontsize=14)
    else:
        ax.set_title(
            f"TESSERA Embedding RGB (bands {bands[0]}, {bands[1]}, {bands[2]})",
            fontsize=14,
        )

    plt.tight_layout()

    if output_path:
        fig.savefig(output_path, dpi=150, bbox_inches="tight")
        plt.close(fig)
        logger.info(f"Visualization saved to {output_path}")
        return output_path
    else:
        plt.show()
        return None