Skip to content

cloudmask module

cloudmask module

OmniCloudMask integration for cloud and cloud shadow detection in satellite imagery.

This module provides functions to use OmniCloudMask (https://github.com/DPIRD-DMA/OmniCloudMask) for detecting clouds and cloud shadows in satellite imagery. OmniCloudMask performs semantic segmentation to classify pixels into: Clear (0), Thick Cloud (1), Thin Cloud (2), Cloud Shadow (3).

Supports Sentinel-2, Landsat 8, PlanetScope, and Maxar imagery at 10-50m resolution.

calculate_cloud_statistics(mask)

Calculate statistics from a cloud mask.

Parameters:

Name Type Description Default
mask ndarray

Cloud mask array with values 0-3.

required

Returns:

Name Type Description
dict Dict[str, Any]

Statistics including: - total_pixels: Total number of pixels - clear_pixels: Number of clear pixels - thick_cloud_pixels: Number of thick cloud pixels - thin_cloud_pixels: Number of thin cloud pixels - shadow_pixels: Number of cloud shadow pixels - clear_percent: Percentage of clear pixels - cloud_percent: Percentage of cloudy pixels (thick + thin) - shadow_percent: Percentage of shadow pixels

Example

from geoai.tools.cloudmask import calculate_cloud_statistics import numpy as np mask = np.random.randint(0, 4, (512, 512)) stats = calculate_cloud_statistics(mask) print(f"Clear: {stats['clear_percent']:.1f}%")

Source code in geoai/tools/cloudmask.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
def calculate_cloud_statistics(
    mask: np.ndarray,
) -> Dict[str, Any]:
    """
    Calculate statistics from a cloud mask.

    Args:
        mask (np.ndarray): Cloud mask array with values 0-3.

    Returns:
        dict: Statistics including:
            - total_pixels: Total number of pixels
            - clear_pixels: Number of clear pixels
            - thick_cloud_pixels: Number of thick cloud pixels
            - thin_cloud_pixels: Number of thin cloud pixels
            - shadow_pixels: Number of cloud shadow pixels
            - clear_percent: Percentage of clear pixels
            - cloud_percent: Percentage of cloudy pixels (thick + thin)
            - shadow_percent: Percentage of shadow pixels

    Example:
        >>> from geoai.tools.cloudmask import calculate_cloud_statistics
        >>> import numpy as np
        >>> mask = np.random.randint(0, 4, (512, 512))
        >>> stats = calculate_cloud_statistics(mask)
        >>> print(f"Clear: {stats['clear_percent']:.1f}%")
    """
    total_pixels = mask.size

    clear_pixels = (mask == CLEAR).sum()
    thick_cloud_pixels = (mask == THICK_CLOUD).sum()
    thin_cloud_pixels = (mask == THIN_CLOUD).sum()
    shadow_pixels = (mask == CLOUD_SHADOW).sum()

    cloud_pixels = thick_cloud_pixels + thin_cloud_pixels

    return {
        "total_pixels": int(total_pixels),
        "clear_pixels": int(clear_pixels),
        "thick_cloud_pixels": int(thick_cloud_pixels),
        "thin_cloud_pixels": int(thin_cloud_pixels),
        "shadow_pixels": int(shadow_pixels),
        "clear_percent": float(clear_pixels / total_pixels * 100),
        "cloud_percent": float(cloud_pixels / total_pixels * 100),
        "shadow_percent": float(shadow_pixels / total_pixels * 100),
    }

check_omnicloudmask_available()

Check if omnicloudmask is installed.

Raises:

Type Description
ImportError

If omnicloudmask is not installed.

Source code in geoai/tools/cloudmask.py
37
38
39
40
41
42
43
44
45
46
47
48
49
def check_omnicloudmask_available():
    """
    Check if omnicloudmask is installed.

    Raises:
        ImportError: If omnicloudmask is not installed.
    """
    if not OMNICLOUDMASK_AVAILABLE:
        raise ImportError(
            "omnicloudmask is not installed. "
            "Please install it with: pip install omnicloudmask "
            "or: pip install geoai-py[extra]"
        )

create_cloud_free_mask(mask, include_thin_clouds=False, include_shadows=False)

Create a binary mask of cloud-free pixels.

Parameters:

Name Type Description Default
mask ndarray

Cloud mask with values 0-3.

required
include_thin_clouds bool

If True, treats thin clouds as acceptable. Defaults to False.

False
include_shadows bool

If True, treats shadows as acceptable. Defaults to False.

False

Returns:

Type Description
ndarray

np.ndarray: Binary mask where 1 = usable, 0 = not usable.

Example

from geoai.tools.cloudmask import create_cloud_free_mask import numpy as np mask = np.random.randint(0, 4, (512, 512)) cloud_free = create_cloud_free_mask(mask) print(f"Usable pixels: {cloud_free.sum()}")

Source code in geoai/tools/cloudmask.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def create_cloud_free_mask(
    mask: np.ndarray,
    include_thin_clouds: bool = False,
    include_shadows: bool = False,
) -> np.ndarray:
    """
    Create a binary mask of cloud-free pixels.

    Args:
        mask (np.ndarray): Cloud mask with values 0-3.
        include_thin_clouds (bool): If True, treats thin clouds as acceptable.
            Defaults to False.
        include_shadows (bool): If True, treats shadows as acceptable.
            Defaults to False.

    Returns:
        np.ndarray: Binary mask where 1 = usable, 0 = not usable.

    Example:
        >>> from geoai.tools.cloudmask import create_cloud_free_mask
        >>> import numpy as np
        >>> mask = np.random.randint(0, 4, (512, 512))
        >>> cloud_free = create_cloud_free_mask(mask)
        >>> print(f"Usable pixels: {cloud_free.sum()}")
    """
    # Start with clear pixels
    usable = mask == CLEAR

    # Optionally include thin clouds
    if include_thin_clouds:
        usable = usable | (mask == THIN_CLOUD)

    # Optionally include shadows
    if include_shadows:
        usable = usable | (mask == CLOUD_SHADOW)

    return usable.astype(np.uint8)

predict_cloud_mask(image, batch_size=1, inference_device=None, inference_dtype='fp32', patch_size=1000, export_confidence=False, model_version=None)

Predict cloud mask from a numpy array using OmniCloudMask.

This function classifies each pixel into one of four categories: - 0: Clear - 1: Thick Cloud - 2: Thin Cloud - 3: Cloud Shadow

Parameters:

Name Type Description Default
image ndarray

Input image array with shape (3, height, width) or (height, width, 3). Should contain Red, Green, and NIR bands. Values should be in reflectance (0-1) or digital numbers (0-10000 typical for Sentinel-2/Landsat).

required
batch_size int

Number of patches to process per inference batch. Defaults to 1.

1
inference_device str

Device for inference ('cpu', 'cuda', or 'mps'). Defaults to None, which will use the device with the most available memory.

None
inference_dtype str

Data type for inference ('fp32', 'fp16', or 'bf16'). 'bf16' recommended for speed on compatible hardware. Defaults to 'fp32'.

'fp32'
patch_size int

Size of patches for processing large images. Defaults to 1000.

1000
export_confidence bool

If True, also returns confidence map. Defaults to False.

False
model_version int

Model version to use (1, 2, or 3). Defaults to None, which will use the latest version.

None

Returns:

Type Description
Union[ndarray, Tuple[ndarray, ndarray]]

Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: Cloud mask array with shape (height, width) containing class predictions. If export_confidence=True, returns a tuple of (mask, confidence).

Raises:

Type Description
ImportError

If omnicloudmask is not installed.

ValueError

If image has wrong shape or number of channels.

Example

import numpy as np from geoai.tools.cloudmask import predict_cloud_mask

Create synthetic image (3 bands: R, G, NIR)

image = np.random.rand(3, 512, 512) * 10000 mask = predict_cloud_mask(image) print(f"Clear pixels: {(mask == 0).sum()}")

Source code in geoai/tools/cloudmask.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def predict_cloud_mask(
    image: np.ndarray,
    batch_size: int = 1,
    inference_device: Optional[str] = None,
    inference_dtype: str = "fp32",
    patch_size: int = 1000,
    export_confidence: bool = False,
    model_version: Optional[int] = None,
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    """
    Predict cloud mask from a numpy array using OmniCloudMask.

    This function classifies each pixel into one of four categories:
    - 0: Clear
    - 1: Thick Cloud
    - 2: Thin Cloud
    - 3: Cloud Shadow

    Args:
        image (np.ndarray): Input image array with shape (3, height, width) or (height, width, 3).
            Should contain Red, Green, and NIR bands. Values should be in reflectance (0-1)
            or digital numbers (0-10000 typical for Sentinel-2/Landsat).
        batch_size (int): Number of patches to process per inference batch. Defaults to 1.
        inference_device (str): Device for inference ('cpu', 'cuda', or 'mps').
            Defaults to None, which will use the device with the most available memory.
        inference_dtype (str): Data type for inference ('fp32', 'fp16', or 'bf16').
            'bf16' recommended for speed on compatible hardware. Defaults to 'fp32'.
        patch_size (int): Size of patches for processing large images. Defaults to 1000.
        export_confidence (bool): If True, also returns confidence map. Defaults to False.
        model_version (int, optional): Model version to use (1, 2, or 3).
            Defaults to None, which will use the latest version.

    Returns:
        Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: Cloud mask array with
            shape (height, width) containing class predictions. If
            export_confidence=True, returns a tuple of (mask, confidence).

    Raises:
        ImportError: If omnicloudmask is not installed.
        ValueError: If image has wrong shape or number of channels.

    Example:
        >>> import numpy as np
        >>> from geoai.tools.cloudmask import predict_cloud_mask
        >>> # Create synthetic image (3 bands: R, G, NIR)
        >>> image = np.random.rand(3, 512, 512) * 10000
        >>> mask = predict_cloud_mask(image)
        >>> print(f"Clear pixels: {(mask == 0).sum()}")
    """
    check_omnicloudmask_available()

    # Ensure image has correct shape (3, H, W)
    if image.ndim != 3:
        raise ValueError(f"Image must be 3D, got shape {image.shape}")

    # Convert (H, W, 3) to (3, H, W) if needed
    if image.shape[2] == 3 and image.shape[0] != 3:
        image = np.transpose(image, (2, 0, 1))

    if image.shape[0] != 3:
        raise ValueError(
            f"Image must have 3 channels (R, G, NIR), got {image.shape[0]} channels"
        )

    # Call OmniCloudMask
    result = predict_from_array(
        image,
        batch_size=batch_size,
        inference_device=inference_device,
        inference_dtype=inference_dtype,
        patch_size=patch_size,
        export_confidence=export_confidence,
        model_version=model_version,
    )

    # Handle output shape - omnicloudmask returns (1, H, W) or ((1, H, W), (1, H, W))
    if export_confidence:
        mask, confidence = result
        # Squeeze batch dimension
        mask = mask.squeeze(0) if mask.ndim == 3 else mask
        confidence = confidence.squeeze(0) if confidence.ndim == 3 else confidence
        return mask, confidence
    else:
        # Squeeze batch dimension
        return result.squeeze(0) if result.ndim == 3 else result

predict_cloud_mask_batch(input_paths, output_dir, red_band=1, green_band=2, nir_band=3, batch_size=1, inference_device='cpu', inference_dtype='fp32', patch_size=1000, export_confidence=False, model_version=3, suffix='_cloudmask', verbose=True)

Predict cloud masks for multiple rasters in batch.

Processes multiple GeoTIFF files with the same cloud detection parameters and saves results to an output directory.

Parameters:

Name Type Description Default
input_paths list of str

Paths to input GeoTIFF files.

required
output_dir str

Directory to save cloud masks.

required
red_band int

Red band index. Defaults to 1.

1
green_band int

Green band index. Defaults to 2.

2
nir_band int

NIR band index. Defaults to 3.

3
batch_size int

Patches per batch. Defaults to 1.

1
inference_device str

Device. Defaults to 'cpu'.

'cpu'
inference_dtype str

Dtype. Defaults to 'fp32'.

'fp32'
patch_size int

Patch size. Defaults to 1000.

1000
export_confidence bool

Export confidence. Defaults to False.

False
model_version str

Model version. Defaults to '3.0'.

3
suffix str

Suffix for output filenames. Defaults to '_cloudmask'.

'_cloudmask'
verbose bool

Print progress. Defaults to True.

True

Returns:

Type Description
List[str]

list of str: Paths to output cloud mask files.

Raises:

Type Description
ImportError

If omnicloudmask or rasterio not installed.

Example

from geoai.tools.cloudmask import predict_cloud_mask_batch files = ["scene1.tif", "scene2.tif", "scene3.tif"] outputs = predict_cloud_mask_batch( ... files, ... output_dir="cloud_masks", ... inference_device="cuda" ... )

Source code in geoai/tools/cloudmask.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def predict_cloud_mask_batch(
    input_paths: List[str],
    output_dir: str,
    red_band: int = 1,
    green_band: int = 2,
    nir_band: int = 3,
    batch_size: int = 1,
    inference_device: str = "cpu",
    inference_dtype: str = "fp32",
    patch_size: int = 1000,
    export_confidence: bool = False,
    model_version: int = 3,
    suffix: str = "_cloudmask",
    verbose: bool = True,
) -> List[str]:
    """
    Predict cloud masks for multiple rasters in batch.

    Processes multiple GeoTIFF files with the same cloud detection parameters
    and saves results to an output directory.

    Args:
        input_paths (list of str): Paths to input GeoTIFF files.
        output_dir (str): Directory to save cloud masks.
        red_band (int): Red band index. Defaults to 1.
        green_band (int): Green band index. Defaults to 2.
        nir_band (int): NIR band index. Defaults to 3.
        batch_size (int): Patches per batch. Defaults to 1.
        inference_device (str): Device. Defaults to 'cpu'.
        inference_dtype (str): Dtype. Defaults to 'fp32'.
        patch_size (int): Patch size. Defaults to 1000.
        export_confidence (bool): Export confidence. Defaults to False.
        model_version (str): Model version. Defaults to '3.0'.
        suffix (str): Suffix for output filenames. Defaults to '_cloudmask'.
        verbose (bool): Print progress. Defaults to True.

    Returns:
        list of str: Paths to output cloud mask files.

    Raises:
        ImportError: If omnicloudmask or rasterio not installed.

    Example:
        >>> from geoai.tools.cloudmask import predict_cloud_mask_batch
        >>> files = ["scene1.tif", "scene2.tif", "scene3.tif"]
        >>> outputs = predict_cloud_mask_batch(
        ...     files,
        ...     output_dir="cloud_masks",
        ...     inference_device="cuda"
        ... )
    """
    check_omnicloudmask_available()

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    output_paths = []

    for i, input_path in enumerate(input_paths):
        if verbose:
            print(f"Processing {i+1}/{len(input_paths)}: {input_path}")

        # Generate output filename
        basename = os.path.basename(input_path)
        name, ext = os.path.splitext(basename)
        output_filename = f"{name}{suffix}{ext}"
        output_path = os.path.join(output_dir, output_filename)

        try:
            # Predict cloud mask
            predict_cloud_mask_from_raster(
                input_path,
                output_path,
                red_band=red_band,
                green_band=green_band,
                nir_band=nir_band,
                batch_size=batch_size,
                inference_device=inference_device,
                inference_dtype=inference_dtype,
                patch_size=patch_size,
                export_confidence=export_confidence,
                model_version=model_version,
            )

            output_paths.append(output_path)

            if verbose:
                print(f"  ✓ Saved to: {output_path}")

        except Exception as e:
            if verbose:
                print(f"  ✗ Failed: {e}")
            continue

    return output_paths

predict_cloud_mask_from_raster(input_path, output_path, red_band=1, green_band=2, nir_band=3, batch_size=1, inference_device=None, inference_dtype='fp32', patch_size=1000, export_confidence=False, model_version=None)

Predict cloud mask from a GeoTIFF file and save the result.

Reads a multi-band raster, extracts RGB+NIR bands, applies OmniCloudMask, and saves the result while preserving geospatial metadata.

Parameters:

Name Type Description Default
input_path str

Path to input GeoTIFF file.

required
output_path str

Path to save cloud mask GeoTIFF.

required
red_band int

Band index for Red (1-indexed). Defaults to 1.

1
green_band int

Band index for Green (1-indexed). Defaults to 2.

2
nir_band int

Band index for NIR (1-indexed). Defaults to 3.

3
batch_size int

Patches per inference batch. Defaults to 1.

1
inference_device str

Device ('cpu', 'cuda', 'mps'). Defaults to None, which will use the device with the most available memory.

None
inference_dtype str

Dtype ('fp32', 'fp16', 'bf16'). Defaults to 'fp32'.

'fp32'
patch_size int

Patch size for large images. Defaults to 1000.

1000
export_confidence bool

Export confidence map. Defaults to False.

False
model_version int

Model version (1, 2, or 3). Defaults to None, which will use the latest version.

None

Returns:

Name Type Description
None None

Writes cloud mask to output_path.

Raises:

Type Description
ImportError

If omnicloudmask or rasterio not installed.

FileNotFoundError

If input_path doesn't exist.

Example

from geoai.tools.cloudmask import predict_cloud_mask_from_raster predict_cloud_mask_from_raster( ... "sentinel2_image.tif", ... "cloud_mask.tif", ... red_band=4, # Sentinel-2 band order ... green_band=3, ... nir_band=8 ... )

Source code in geoai/tools/cloudmask.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def predict_cloud_mask_from_raster(
    input_path: str,
    output_path: str,
    red_band: int = 1,
    green_band: int = 2,
    nir_band: int = 3,
    batch_size: int = 1,
    inference_device: Optional[str] = None,
    inference_dtype: str = "fp32",
    patch_size: int = 1000,
    export_confidence: bool = False,
    model_version: Optional[int] = None,
) -> None:
    """
    Predict cloud mask from a GeoTIFF file and save the result.

    Reads a multi-band raster, extracts RGB+NIR bands, applies OmniCloudMask,
    and saves the result while preserving geospatial metadata.

    Args:
        input_path (str): Path to input GeoTIFF file.
        output_path (str): Path to save cloud mask GeoTIFF.
        red_band (int): Band index for Red (1-indexed). Defaults to 1.
        green_band (int): Band index for Green (1-indexed). Defaults to 2.
        nir_band (int): Band index for NIR (1-indexed). Defaults to 3.
        batch_size (int): Patches per inference batch. Defaults to 1.
        inference_device (str, optional): Device ('cpu', 'cuda', 'mps').
            Defaults to None, which will use the device with the most available memory.
        inference_dtype (str): Dtype ('fp32', 'fp16', 'bf16'). Defaults to 'fp32'.
        patch_size (int): Patch size for large images. Defaults to 1000.
        export_confidence (bool): Export confidence map. Defaults to False.
        model_version (int, optional): Model version (1, 2, or 3).
            Defaults to None, which will use the latest version.

    Returns:
        None: Writes cloud mask to output_path.

    Raises:
        ImportError: If omnicloudmask or rasterio not installed.
        FileNotFoundError: If input_path doesn't exist.

    Example:
        >>> from geoai.tools.cloudmask import predict_cloud_mask_from_raster
        >>> predict_cloud_mask_from_raster(
        ...     "sentinel2_image.tif",
        ...     "cloud_mask.tif",
        ...     red_band=4,   # Sentinel-2 band order
        ...     green_band=3,
        ...     nir_band=8
        ... )
    """
    check_omnicloudmask_available()

    if not RASTERIO_AVAILABLE:
        raise ImportError(
            "rasterio is required for raster operations. "
            "Please install it with: pip install rasterio"
        )

    if not os.path.exists(input_path):
        raise FileNotFoundError(f"Input file not found: {input_path}")

    # Read input raster
    with rasterio.open(input_path) as src:
        # Read required bands
        red = src.read(red_band).astype(np.float32)
        green = src.read(green_band).astype(np.float32)
        nir = src.read(nir_band).astype(np.float32)

        # Stack into (3, H, W)
        image = np.stack([red, green, nir], axis=0)

        # Get metadata
        profile = src.profile.copy()

    # Predict cloud mask
    result = predict_cloud_mask(
        image,
        batch_size=batch_size,
        inference_device=inference_device,
        inference_dtype=inference_dtype,
        patch_size=patch_size,
        export_confidence=export_confidence,
        model_version=model_version,
    )

    # Handle confidence output
    if export_confidence:
        mask, confidence = result
    else:
        mask = result

    # Update profile for output
    profile.update(
        dtype=np.uint8,
        count=1,
        compress="lzw",
        nodata=None,
    )

    # Write cloud mask
    output_dir = os.path.dirname(os.path.abspath(output_path))
    if output_dir and output_dir != os.path.abspath(os.sep):
        os.makedirs(output_dir, exist_ok=True)

    with rasterio.open(output_path, "w", **profile) as dst:
        dst.write(mask.astype(np.uint8), 1)

    # Optionally write confidence map
    if export_confidence:
        confidence_path = output_path.replace(".tif", "_confidence.tif")
        profile.update(dtype=np.float32)
        with rasterio.open(confidence_path, "w", **profile) as dst:
            dst.write(confidence, 1)