Skip to content

rfdetr module

RF-DETR integration for object detection on geospatial imagery.

This module provides a Python interface to RF-DETR (https://github.com/roboflow/rf-detr), Roboflow's state-of-the-art real-time object detection model built on DINOv2 and DETR. Supports tiled inference on GeoTIFF imagery with georeferenced output, batch processing, training, and HuggingFace Hub integration.

Requirements
  • rfdetr
  • supervision (installed with rfdetr)

Install with::

1
pip install rfdetr

check_rfdetr_available()

Check if the rfdetr package is installed.

Raises:

Type Description
ImportError

If rfdetr is not installed, with installation instructions.

Source code in geoai/rfdetr.py
116
117
118
119
120
121
122
123
124
125
126
def check_rfdetr_available() -> None:
    """Check if the rfdetr package is installed.

    Raises:
        ImportError: If rfdetr is not installed, with installation instructions.
    """
    if not RFDETR_AVAILABLE:
        raise ImportError(
            "The rfdetr package is required but not installed. "
            "Install it with: pip install rfdetr"
        )

list_rfdetr_models()

List available RF-DETR model variants.

Returns:

Type Description
Dict[str, str]

Dict[str, str]: Dictionary mapping model variant names to their descriptions.

Source code in geoai/rfdetr.py
129
130
131
132
133
134
135
136
def list_rfdetr_models() -> Dict[str, str]:
    """List available RF-DETR model variants.

    Returns:
        Dict[str, str]: Dictionary mapping model variant names to their
            descriptions.
    """
    return {name: info["description"] for name, info in RFDETR_MODELS.items()}

plot_rfdetr_metrics(metrics_path, figsize=None, save_path=None)

Plot training and validation metrics from an RF-DETR training run.

Reads the metrics.csv file produced by RF-DETR training and plots training loss, validation mAP, F1 score, precision/recall, and per-class AP curves.

Parameters:

Name Type Description Default
metrics_path str

Path to the metrics.csv file in the RF-DETR output directory, or path to the output directory itself.

required
figsize Optional[tuple]

Optional figure size as (width, height) in inches. If None, automatically determined based on number of subplots.

None
save_path Optional[str]

Optional path to save the plot image. If None, the plot is displayed but not saved.

None

Returns:

Type Description
Any

pandas.DataFrame: DataFrame containing the validation metrics

Any

indexed by epoch.

Source code in geoai/rfdetr.py
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
def plot_rfdetr_metrics(
    metrics_path: str,
    figsize: Optional[tuple] = None,
    save_path: Optional[str] = None,
) -> Any:
    """Plot training and validation metrics from an RF-DETR training run.

    Reads the ``metrics.csv`` file produced by RF-DETR training and plots
    training loss, validation mAP, F1 score, precision/recall, and
    per-class AP curves.

    Args:
        metrics_path: Path to the ``metrics.csv`` file in the RF-DETR
            output directory, or path to the output directory itself.
        figsize: Optional figure size as ``(width, height)`` in inches.
            If None, automatically determined based on number of subplots.
        save_path: Optional path to save the plot image. If None, the
            plot is displayed but not saved.

    Returns:
        pandas.DataFrame: DataFrame containing the validation metrics
        indexed by epoch.
    """
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MaxNLocator
    import pandas as pd

    # Resolve path
    if os.path.isdir(metrics_path):
        metrics_path = os.path.join(metrics_path, "metrics.csv")
    if not os.path.exists(metrics_path):
        raise FileNotFoundError(f"Metrics file not found: {metrics_path}")

    df = pd.read_csv(metrics_path)

    # Separate train and val rows
    train_df = df.dropna(subset=["train/loss"])
    val_df = df.dropna(subset=["val/mAP_50_95"])

    # Identify per-class AP columns
    ap_cols = [c for c in df.columns if c.startswith("val/AP/")]
    class_names = [c.replace("val/AP/", "") for c in ap_cols]

    # Determine subplot layout
    num_plots = 3  # loss, mAP, F1/precision/recall
    if ap_cols:
        num_plots += 1  # per-class AP

    if figsize is None:
        figsize = (14, 4 * num_plots)

    fig, axes = plt.subplots(num_plots, 1, figsize=figsize)
    if num_plots == 1:
        axes = [axes]
    for ax in axes:
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    idx = 0

    # Plot 1: Training and Validation Loss
    ax = axes[idx]
    if not train_df.empty:
        ax.plot(
            train_df["epoch"], train_df["train/loss"], label="Train Loss", marker="."
        )
    if not val_df.empty and "val/loss" in val_df.columns:
        ax.plot(val_df["epoch"], val_df["val/loss"], label="Val Loss", marker=".")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.set_title("Training and Validation Loss")
    ax.legend()
    ax.grid(True, alpha=0.3)
    idx += 1

    # Plot 2: mAP Metrics
    ax = axes[idx]
    if not val_df.empty:
        ax.plot(val_df["epoch"], val_df["val/mAP_50_95"], label="mAP@50:95", marker="o")
        ax.plot(val_df["epoch"], val_df["val/mAP_50"], label="mAP@50", marker="s")
        if "val/mAP_75" in val_df.columns:
            ax.plot(val_df["epoch"], val_df["val/mAP_75"], label="mAP@75", marker="^")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("mAP")
    ax.set_title("Validation mAP")
    ax.legend()
    ax.grid(True, alpha=0.3)
    idx += 1

    # Plot 3: F1, Precision, Recall
    ax = axes[idx]
    if not val_df.empty:
        if "val/F1" in val_df.columns:
            ax.plot(val_df["epoch"], val_df["val/F1"], label="F1", marker="o")
        if "val/precision" in val_df.columns:
            ax.plot(
                val_df["epoch"], val_df["val/precision"], label="Precision", marker="s"
            )
        if "val/recall" in val_df.columns:
            ax.plot(val_df["epoch"], val_df["val/recall"], label="Recall", marker="^")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Score")
    ax.set_title("Validation F1 / Precision / Recall")
    ax.legend()
    ax.grid(True, alpha=0.3)
    idx += 1

    # Plot 4: Per-class AP
    if ap_cols and idx < num_plots:
        ax = axes[idx]
        if not val_df.empty:
            for col, name in zip(ap_cols, class_names):
                ax.plot(val_df["epoch"], val_df[col], label=name, marker=".")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("AP@50:95")
        ax.set_title("Per-Class AP")
        ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize="small")
        ax.grid(True, alpha=0.3)

    plt.tight_layout()

    if save_path is not None:
        plt.savefig(save_path, dpi=150, bbox_inches="tight")
        logger.info("Plot saved to: %s", save_path)

    plt.show()

    # Return val metrics as DataFrame
    if not val_df.empty:
        result_cols = ["epoch", "val/mAP_50_95", "val/mAP_50", "val/mAR", "val/F1"]
        result_cols += [
            c for c in ["val/precision", "val/recall"] if c in val_df.columns
        ]
        result_cols += ap_cols
        return val_df[result_cols].reset_index(drop=True)
    return pd.DataFrame()

prepare_nwpu_for_rfdetr(output_dir='nwpu-rfdetr', val_split=0.2, seed=42)

Download and prepare the NWPU-VHR-10 dataset for RF-DETR training.

Downloads the NWPU-VHR-10 remote sensing dataset, converts its text annotations to COCO JSON format, and organizes files into the directory structure expected by RF-DETR (train/ and valid/ subdirectories with _annotations.coco.json files).

The NWPU-VHR-10 dataset contains 650 annotated VHR remote sensing images with 10 object classes: airplane, ship, storage_tank, baseball_diamond, tennis_court, basketball_court, ground_track_field, harbor, bridge, and vehicle.

Parameters:

Name Type Description Default
output_dir str

Directory to create the RF-DETR dataset in. Defaults to "nwpu-rfdetr".

'nwpu-rfdetr'
val_split float

Fraction of data to use for validation. Defaults to 0.2.

0.2
seed int

Random seed for train/val split. Defaults to 42.

42

Returns:

Name Type Description
dict Dict[str, Any]

Dictionary with keys: - dataset_dir (str): Path to the prepared dataset directory. - class_names (list): List of class name strings. - num_classes (int): Number of object classes. - train_images (int): Number of training images. - val_images (int): Number of validation images.

Source code in geoai/rfdetr.py
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
def prepare_nwpu_for_rfdetr(
    output_dir: str = "nwpu-rfdetr",
    val_split: float = 0.2,
    seed: int = 42,
) -> Dict[str, Any]:
    """Download and prepare the NWPU-VHR-10 dataset for RF-DETR training.

    Downloads the NWPU-VHR-10 remote sensing dataset, converts its text
    annotations to COCO JSON format, and organizes files into the
    directory structure expected by RF-DETR (``train/`` and ``valid/``
    subdirectories with ``_annotations.coco.json`` files).

    The NWPU-VHR-10 dataset contains 650 annotated VHR remote sensing
    images with 10 object classes: airplane, ship, storage_tank,
    baseball_diamond, tennis_court, basketball_court, ground_track_field,
    harbor, bridge, and vehicle.

    Args:
        output_dir: Directory to create the RF-DETR dataset in. Defaults
            to "nwpu-rfdetr".
        val_split: Fraction of data to use for validation. Defaults to 0.2.
        seed: Random seed for train/val split. Defaults to 42.

    Returns:
        dict: Dictionary with keys:
            - dataset_dir (str): Path to the prepared dataset directory.
            - class_names (list): List of class name strings.
            - num_classes (int): Number of object classes.
            - train_images (int): Number of training images.
            - val_images (int): Number of validation images.
    """
    import shutil

    from .object_detect import download_nwpu_vhr10, prepare_nwpu_vhr10

    # Download dataset
    logger.info("Downloading NWPU-VHR-10 dataset...")
    data_dir = download_nwpu_vhr10()

    # Convert to COCO format
    logger.info("Converting annotations to COCO format...")
    coco_tmp = os.path.join(output_dir, "_coco_tmp")
    result = prepare_nwpu_vhr10(
        data_dir=data_dir,
        output_dir=coco_tmp,
        val_split=val_split,
        seed=seed,
    )

    # RF-DETR uses 0-indexed class IDs, so class_names should NOT
    # include "background" at index 0. The COCO categories in the
    # annotation files are 1-indexed, and RF-DETR remaps them to
    # 0-indexed internally during training.
    class_names = [name for name in result["class_names"] if name != "background"]
    images_dir = result.get("images_dir", os.path.join(data_dir, "positive_image_set"))

    # Create RF-DETR directory structure
    for split, ann_key in [
        ("train", "train_annotations"),
        ("valid", "val_annotations"),
    ]:
        split_dir = os.path.join(output_dir, split)
        os.makedirs(split_dir, exist_ok=True)

        ann_path = result[ann_key]
        with open(ann_path, "r") as f:
            coco_data = json.load(f)

        # Copy images
        for img in coco_data["images"]:
            src = os.path.join(images_dir, img["file_name"])
            dst = os.path.join(split_dir, img["file_name"])
            if os.path.exists(src) and not os.path.exists(dst):
                shutil.copy2(src, dst)

        # Save annotations
        ann_dst = os.path.join(split_dir, "_annotations.coco.json")
        with open(ann_dst, "w") as f:
            json.dump(coco_data, f)

    # Get counts
    with open(os.path.join(output_dir, "train", "_annotations.coco.json")) as f:
        train_data = json.load(f)
    with open(os.path.join(output_dir, "valid", "_annotations.coco.json")) as f:
        val_data = json.load(f)

    # Clean up temporary COCO files
    shutil.rmtree(coco_tmp, ignore_errors=True)

    logger.info(
        "NWPU-VHR-10 dataset prepared: %d train, %d val images, %d classes",
        len(train_data["images"]),
        len(val_data["images"]),
        len(class_names),
    )

    return {
        "dataset_dir": os.path.abspath(output_dir),
        "class_names": class_names,
        "num_classes": len(class_names),
        "train_images": len(train_data["images"]),
        "val_images": len(val_data["images"]),
    }

push_rfdetr_to_hub(model_path, repo_id, model_variant='base', num_classes=None, class_names=None, commit_message=None, private=False, token=None)

Push a trained RF-DETR model to Hugging Face Hub.

Uploads the model weights and a config.json file containing model metadata to the specified Hub repository. The repository is created automatically if it does not already exist.

Parameters:

Name Type Description Default
model_path str

Path to the trained model weights file.

required
repo_id str

Hub repository in "username/repo-name" format.

required
model_variant str

RF-DETR model variant name. Stored in config.json so the model can be reconstructed on download. Defaults to "base".

'base'
num_classes Optional[int]

Number of detection classes. If None, inferred from class_names length.

None
class_names Optional[List[str]]

Ordered list of class name strings. Stored in config.json for downstream use.

None
commit_message Optional[str]

Commit message for the Hub upload. Defaults to a descriptive string.

None
private bool

Whether to create a private repository. Defaults to False.

False
token Optional[str]

Hugging Face API token with write access. If None, uses the token stored by huggingface-cli login.

None

Returns:

Name Type Description
str Optional[str]

URL of the uploaded repository, or None if huggingface_hub

Optional[str]

is not installed.

Source code in geoai/rfdetr.py
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
def push_rfdetr_to_hub(
    model_path: str,
    repo_id: str,
    model_variant: str = "base",
    num_classes: Optional[int] = None,
    class_names: Optional[List[str]] = None,
    commit_message: Optional[str] = None,
    private: bool = False,
    token: Optional[str] = None,
) -> Optional[str]:
    """Push a trained RF-DETR model to Hugging Face Hub.

    Uploads the model weights and a config.json file containing model
    metadata to the specified Hub repository. The repository is created
    automatically if it does not already exist.

    Args:
        model_path: Path to the trained model weights file.
        repo_id: Hub repository in "username/repo-name" format.
        model_variant: RF-DETR model variant name. Stored in config.json
            so the model can be reconstructed on download. Defaults to
            "base".
        num_classes: Number of detection classes. If None, inferred from
            class_names length.
        class_names: Ordered list of class name strings. Stored in
            config.json for downstream use.
        commit_message: Commit message for the Hub upload. Defaults to
            a descriptive string.
        private: Whether to create a private repository. Defaults to False.
        token: Hugging Face API token with write access. If None, uses
            the token stored by ``huggingface-cli login``.

    Returns:
        str: URL of the uploaded repository, or None if huggingface_hub
        is not installed.
    """
    try:
        from huggingface_hub import HfApi, create_repo
    except ImportError:
        logger.error(
            "huggingface_hub is required to push models. "
            "Install it with: pip install huggingface-hub"
        )
        return None

    if num_classes is None and class_names is not None:
        num_classes = len(class_names)

    config: Dict[str, Any] = {
        "model_type": "rfdetr",
        "model_variant": model_variant,
        "num_classes": num_classes,
        "class_names": class_names,
        "resolution": RFDETR_MODELS.get(model_variant, {}).get("resolution"),
    }

    try:
        api = HfApi(token=token)
        create_repo(repo_id, private=private, token=token, exist_ok=True)

        if commit_message is None:
            commit_message = f"Upload RF-DETR {model_variant} object detection model"

        with tempfile.TemporaryDirectory() as tmpdir:
            # Copy weights
            import shutil

            weights_dest = os.path.join(tmpdir, "weights.pth")
            shutil.copy2(model_path, weights_dest)

            # Write config
            config_path = os.path.join(tmpdir, "config.json")
            with open(config_path, "w") as f:
                json.dump(config, f, indent=2)

            api.upload_folder(
                folder_path=tmpdir,
                repo_id=repo_id,
                commit_message=commit_message,
                token=token,
            )

        url = f"https://huggingface.co/{repo_id}"
        logger.info("Model successfully pushed to: %s", url)
        return url
    except Exception as e:
        logger.error("Failed to push model to Hub: %s", e)
        return None

rfdetr_detect(input_path, output_path=None, model_variant='base', pretrain_weights=None, confidence_threshold=0.5, nms_threshold=0.3, window_size=None, overlap=None, batch_size=4, class_names=None, device=None, **kwargs)

Perform object detection on a GeoTIFF using RF-DETR.

Runs tiled inference with a sliding window approach on a GeoTIFF image using an RF-DETR model. Detections from overlapping tiles are merged using class-aware Non-Maximum Suppression (NMS). Results are returned as a georeferenced GeoDataFrame.

Parameters:

Name Type Description Default
input_path str

Path to input GeoTIFF image.

required
output_path Optional[str]

Optional path to save the output GeoDataFrame (as GeoJSON, GPKG, or Shapefile based on extension). If None, results are only returned in memory.

None
model_variant str

RF-DETR model variant to use. See list_rfdetr_models() for available options. Defaults to "base".

'base'
pretrain_weights Optional[str]

Path to custom pretrained weights file. If None, uses COCO pretrained weights.

None
confidence_threshold float

Minimum confidence score for detections. Defaults to 0.5.

0.5
nms_threshold float

IoU threshold for Non-Maximum Suppression across tiles. Defaults to 0.3.

0.3
window_size Optional[int]

Size of the sliding window in pixels. Defaults to the model's native resolution.

None
overlap Optional[int]

Overlap between adjacent windows in pixels. Defaults to window_size // 4.

None
batch_size int

Number of tiles to process in each batch. Defaults to 4.

4
class_names Optional[List[str]]

Optional list of class names for labeling detections. If None and using COCO pretrained weights, COCO class names are used.

None
device Optional[str]

Device to use ("cpu", "cuda", "mps"). If None, auto-detected.

None
**kwargs Any

Additional keyword arguments passed to the model constructor.

{}

Returns:

Type Description
Any

geopandas.GeoDataFrame: GeoDataFrame with columns: geometry,

Any

class_id, class_name, confidence. The geometry column contains

Any

georeferenced bounding box polygons.

Source code in geoai/rfdetr.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
def rfdetr_detect(
    input_path: str,
    output_path: Optional[str] = None,
    model_variant: str = "base",
    pretrain_weights: Optional[str] = None,
    confidence_threshold: float = 0.5,
    nms_threshold: float = 0.3,
    window_size: Optional[int] = None,
    overlap: Optional[int] = None,
    batch_size: int = 4,
    class_names: Optional[List[str]] = None,
    device: Optional[str] = None,
    **kwargs: Any,
) -> Any:
    """Perform object detection on a GeoTIFF using RF-DETR.

    Runs tiled inference with a sliding window approach on a GeoTIFF image
    using an RF-DETR model. Detections from overlapping tiles are merged
    using class-aware Non-Maximum Suppression (NMS). Results are returned
    as a georeferenced GeoDataFrame.

    Args:
        input_path: Path to input GeoTIFF image.
        output_path: Optional path to save the output GeoDataFrame
            (as GeoJSON, GPKG, or Shapefile based on extension). If None,
            results are only returned in memory.
        model_variant: RF-DETR model variant to use. See
            ``list_rfdetr_models()`` for available options. Defaults to
            "base".
        pretrain_weights: Path to custom pretrained weights file. If None,
            uses COCO pretrained weights.
        confidence_threshold: Minimum confidence score for detections.
            Defaults to 0.5.
        nms_threshold: IoU threshold for Non-Maximum Suppression across
            tiles. Defaults to 0.3.
        window_size: Size of the sliding window in pixels. Defaults to
            the model's native resolution.
        overlap: Overlap between adjacent windows in pixels. Defaults to
            ``window_size // 4``.
        batch_size: Number of tiles to process in each batch. Defaults to 4.
        class_names: Optional list of class names for labeling detections.
            If None and using COCO pretrained weights, COCO class names
            are used.
        device: Device to use ("cpu", "cuda", "mps"). If None, auto-detected.
        **kwargs: Additional keyword arguments passed to the model constructor.

    Returns:
        geopandas.GeoDataFrame: GeoDataFrame with columns: geometry,
        class_id, class_name, confidence. The geometry column contains
        georeferenced bounding box polygons.
    """
    import geopandas as gpd
    import rasterio
    import torch
    import torchvision
    from PIL import Image
    from rasterio.windows import Window
    from shapely.geometry import Polygon
    from tqdm import tqdm

    check_rfdetr_available()

    model = _create_rfdetr_model(
        model_variant=model_variant,
        pretrain_weights=pretrain_weights,
        device=device,
        **kwargs,
    )

    # Resolve window_size and overlap
    model_resolution = RFDETR_MODELS[model_variant]["resolution"]
    if window_size is None:
        window_size = model_resolution
    if overlap is None:
        overlap = window_size // 4

    # Get class names from model if not provided
    if class_names is None:
        class_names = model.class_names

    with rasterio.open(input_path) as src:
        height = src.height
        width = src.width
        crs = src.crs
        transform = src.transform

        all_boxes = []
        all_scores = []
        all_class_ids = []

        stride = window_size - overlap
        if stride <= 0:
            raise ValueError(
                f"overlap ({overlap}) must be less than window_size "
                f"({window_size})."
            )
        steps_y = max(1, math.ceil((height - window_size) / stride) + 1)
        steps_x = max(1, math.ceil((width - window_size) / stride) + 1)
        total_windows = steps_y * steps_x

        logger.info(
            "Processing %d windows (%dx%d, overlap=%d) on %s",
            total_windows,
            window_size,
            window_size,
            overlap,
            os.path.basename(input_path),
        )

        pbar = tqdm(total=total_windows, desc="RF-DETR detection")
        batch_images = []
        batch_positions = []
        start_time = time.time()

        for i in range(steps_y):
            y = min(i * stride, max(0, height - window_size))

            for j in range(steps_x):
                x = min(j * stride, max(0, width - window_size))

                actual_w = min(window_size, width - x)
                actual_h = min(window_size, height - y)

                window = src.read(window=Window(x, y, actual_w, actual_h))

                if window.shape[1] == 0 or window.shape[2] == 0:
                    pbar.update(1)
                    continue

                # Extract RGB bands (first 3) and transpose to HWC
                if window.shape[0] >= 3:
                    rgb = window[:3]
                else:
                    # Pad to 3 channels if needed
                    rgb = np.zeros(
                        (3, window.shape[1], window.shape[2]), dtype=window.dtype
                    )
                    rgb[: window.shape[0]] = window
                tile_hwc = np.transpose(rgb, (1, 2, 0))

                # Pad to full window_size if tile is smaller (edge tiles)
                if actual_h < window_size or actual_w < window_size:
                    padded = np.zeros(
                        (window_size, window_size, 3), dtype=tile_hwc.dtype
                    )
                    padded[:actual_h, :actual_w] = tile_hwc
                    tile_hwc = padded

                pil_image = Image.fromarray(tile_hwc.astype(np.uint8))
                batch_images.append(pil_image)
                batch_positions.append((x, y, actual_w, actual_h))

                if len(batch_images) == batch_size or (
                    i == steps_y - 1 and j == steps_x - 1
                ):
                    if batch_images:
                        detections_list = model.predict(
                            batch_images,
                            threshold=confidence_threshold,
                        )

                        # predict returns single Detections if single image
                        if not isinstance(detections_list, list):
                            detections_list = [detections_list]

                        for det, (x_pos, y_pos, w, h) in zip(
                            detections_list, batch_positions
                        ):
                            if len(det.xyxy) > 0:
                                # Scale from model resolution to actual tile
                                # size, then offset to global pixel coords.
                                # RF-DETR rescales detections back to input
                                # image size (window_size x window_size).
                                boxes = det.xyxy.copy()

                                # Clip boxes to the actual tile area (exclude
                                # padding region for edge tiles)
                                boxes[:, 0] = np.clip(boxes[:, 0], 0, w)
                                boxes[:, 1] = np.clip(boxes[:, 1], 0, h)
                                boxes[:, 2] = np.clip(boxes[:, 2], 0, w)
                                boxes[:, 3] = np.clip(boxes[:, 3], 0, h)

                                # Skip detections that collapsed after clipping
                                valid = (boxes[:, 2] > boxes[:, 0]) & (
                                    boxes[:, 3] > boxes[:, 1]
                                )
                                boxes = boxes[valid]
                                scores = det.confidence[valid]
                                class_ids = det.class_id[valid]

                                # Offset to global pixel coordinates
                                boxes[:, 0] += x_pos
                                boxes[:, 1] += y_pos
                                boxes[:, 2] += x_pos
                                boxes[:, 3] += y_pos

                                all_boxes.append(boxes)
                                all_scores.append(scores)
                                all_class_ids.append(class_ids)

                        pbar.update(len(batch_images))
                        batch_images = []
                        batch_positions = []

        pbar.close()
        elapsed = time.time() - start_time
        logger.info("Tiled inference completed in %.1f seconds", elapsed)

    # Merge detections from all tiles with class-aware NMS
    if all_boxes:
        all_boxes = np.concatenate(all_boxes, axis=0)
        all_scores = np.concatenate(all_scores, axis=0)
        all_class_ids = np.concatenate(all_class_ids, axis=0)

        logger.info("Collected %d detections before NMS", len(all_boxes))

        boxes_tensor = torch.tensor(all_boxes, dtype=torch.float32)
        scores_tensor = torch.tensor(all_scores, dtype=torch.float32)
        labels_tensor = torch.tensor(all_class_ids, dtype=torch.int64)

        keep_indices = torchvision.ops.batched_nms(
            boxes_tensor, scores_tensor, labels_tensor, nms_threshold
        )

        final_boxes = all_boxes[keep_indices.numpy()]
        final_scores = all_scores[keep_indices.numpy()]
        final_class_ids = all_class_ids[keep_indices.numpy()]

        logger.info("After NMS: %d detections", len(final_boxes))
    else:
        final_boxes = np.empty((0, 4))
        final_scores = np.empty((0,))
        final_class_ids = np.empty((0,), dtype=int)

    # Convert pixel coordinates to georeferenced polygons
    records = []
    for idx in range(len(final_boxes)):
        c0, r0, c1, r1 = final_boxes[idx]
        class_id = int(final_class_ids[idx])
        score = float(final_scores[idx])

        # Convert pixel corners to georeferenced coordinates
        pts = []
        for c, r in [(c0, r0), (c1, r0), (c1, r1), (c0, r1)]:
            geo_x, geo_y = transform * (float(c), float(r))
            pts.append((geo_x, geo_y))
        geom = Polygon(pts)

        # Resolve class name
        if class_names and class_id < len(class_names):
            name = class_names[class_id]
        else:
            name = f"class_{class_id}"

        records.append(
            {
                "geometry": geom,
                "class_id": class_id,
                "class_name": name,
                "confidence": score,
            }
        )

    if records:
        gdf = gpd.GeoDataFrame(records, crs=crs)
    else:
        gdf = gpd.GeoDataFrame(
            columns=["geometry", "class_id", "class_name", "confidence"],
            geometry="geometry",
            crs=crs,
        )

    if output_path is not None:
        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
        gdf.to_file(output_path)
        logger.info("Saved %d detections to %s", len(gdf), output_path)

    return gdf

rfdetr_detect_batch(input_paths, output_dir=None, model_variant='base', pretrain_weights=None, confidence_threshold=0.5, nms_threshold=0.3, window_size=None, overlap=None, batch_size=4, class_names=None, device=None, **kwargs)

Perform batch object detection on multiple GeoTIFFs using RF-DETR.

Processes multiple GeoTIFF images using tiled RF-DETR inference and returns a combined GeoDataFrame with all detections.

Parameters:

Name Type Description Default
input_paths Union[str, List[str]]

Either a list of file paths or a glob pattern string (e.g., "images/*.tif") matching GeoTIFF files.

required
output_dir Optional[str]

Optional directory to save per-image detection results. Each output file is named after the input with a "_detections" suffix. If None, results are only returned in memory.

None
model_variant str

RF-DETR model variant to use. Defaults to "base".

'base'
pretrain_weights Optional[str]

Path to custom pretrained weights file.

None
confidence_threshold float

Minimum confidence score. Defaults to 0.5.

0.5
nms_threshold float

IoU threshold for NMS. Defaults to 0.3.

0.3
window_size Optional[int]

Sliding window size in pixels.

None
overlap Optional[int]

Overlap between adjacent windows in pixels.

None
batch_size int

Number of tiles per inference batch. Defaults to 4.

4
class_names Optional[List[str]]

Optional list of class names.

None
device Optional[str]

Device to use. If None, auto-detected.

None
**kwargs Any

Additional keyword arguments passed to the model constructor.

{}

Returns:

Type Description
Any

geopandas.GeoDataFrame: Combined GeoDataFrame with detections from

Any

all images. Includes an additional "source_file" column indicating

Any

which image each detection came from.

Source code in geoai/rfdetr.py
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
def rfdetr_detect_batch(
    input_paths: Union[str, List[str]],
    output_dir: Optional[str] = None,
    model_variant: str = "base",
    pretrain_weights: Optional[str] = None,
    confidence_threshold: float = 0.5,
    nms_threshold: float = 0.3,
    window_size: Optional[int] = None,
    overlap: Optional[int] = None,
    batch_size: int = 4,
    class_names: Optional[List[str]] = None,
    device: Optional[str] = None,
    **kwargs: Any,
) -> Any:
    """Perform batch object detection on multiple GeoTIFFs using RF-DETR.

    Processes multiple GeoTIFF images using tiled RF-DETR inference and
    returns a combined GeoDataFrame with all detections.

    Args:
        input_paths: Either a list of file paths or a glob pattern string
            (e.g., "images/*.tif") matching GeoTIFF files.
        output_dir: Optional directory to save per-image detection results.
            Each output file is named after the input with a "_detections"
            suffix. If None, results are only returned in memory.
        model_variant: RF-DETR model variant to use. Defaults to "base".
        pretrain_weights: Path to custom pretrained weights file.
        confidence_threshold: Minimum confidence score. Defaults to 0.5.
        nms_threshold: IoU threshold for NMS. Defaults to 0.3.
        window_size: Sliding window size in pixels.
        overlap: Overlap between adjacent windows in pixels.
        batch_size: Number of tiles per inference batch. Defaults to 4.
        class_names: Optional list of class names.
        device: Device to use. If None, auto-detected.
        **kwargs: Additional keyword arguments passed to the model constructor.

    Returns:
        geopandas.GeoDataFrame: Combined GeoDataFrame with detections from
        all images. Includes an additional "source_file" column indicating
        which image each detection came from.
    """
    import geopandas as gpd

    check_rfdetr_available()

    # Resolve input paths
    if isinstance(input_paths, str):
        if any(ch in input_paths for ch in ("*", "?", "[")):
            resolved_paths = sorted(_glob.glob(input_paths))
        elif os.path.isdir(input_paths):
            tif_paths = _glob.glob(os.path.join(input_paths, "*.tif"))
            tiff_paths = _glob.glob(os.path.join(input_paths, "*.tiff"))
            resolved_paths = sorted(set(tif_paths + tiff_paths))
        else:
            resolved_paths = [input_paths]
        if not resolved_paths:
            raise FileNotFoundError(f"No input files found for: {input_paths}")
    else:
        resolved_paths = list(input_paths)

    if not resolved_paths:
        raise ValueError("No input files provided.")

    logger.info("Processing %d images with RF-DETR", len(resolved_paths))

    all_gdfs = []
    for path in resolved_paths:
        logger.info("Processing: %s", os.path.basename(path))

        output_path = None
        if output_dir is not None:
            os.makedirs(output_dir, exist_ok=True)
            base = os.path.splitext(os.path.basename(path))[0]
            output_path = os.path.join(output_dir, f"{base}_detections.geojson")

        gdf = rfdetr_detect(
            input_path=path,
            output_path=output_path,
            model_variant=model_variant,
            pretrain_weights=pretrain_weights,
            confidence_threshold=confidence_threshold,
            nms_threshold=nms_threshold,
            window_size=window_size,
            overlap=overlap,
            batch_size=batch_size,
            class_names=class_names,
            device=device,
            **kwargs,
        )
        gdf["source_file"] = os.path.basename(path)
        all_gdfs.append(gdf)

    if all_gdfs:
        import pandas as pd

        combined = gpd.GeoDataFrame(pd.concat(all_gdfs, ignore_index=True))
        if all_gdfs[0].crs is not None:
            combined = combined.set_crs(all_gdfs[0].crs)
    else:
        combined = gpd.GeoDataFrame(
            columns=["geometry", "class_id", "class_name", "confidence", "source_file"]
        )

    logger.info(
        "Batch detection complete: %d total detections from %d images",
        len(combined),
        len(resolved_paths),
    )
    return combined

rfdetr_detect_from_hub(input_path, repo_id, filename='weights.pth', output_path=None, confidence_threshold=0.5, nms_threshold=0.3, window_size=None, overlap=None, batch_size=4, device=None, token=None, **kwargs)

Run RF-DETR detection using a model from Hugging Face Hub.

Downloads a trained RF-DETR model from HuggingFace Hub and runs tiled detection on a GeoTIFF image.

Parameters:

Name Type Description Default
input_path str

Path to input GeoTIFF image.

required
repo_id str

HuggingFace Hub repository in "username/repo-name" format.

required
filename str

Model weights filename in the repository. Defaults to "weights.pth".

'weights.pth'
output_path Optional[str]

Optional path to save the output GeoDataFrame.

None
confidence_threshold float

Minimum confidence score. Defaults to 0.5.

0.5
nms_threshold float

IoU threshold for NMS. Defaults to 0.3.

0.3
window_size Optional[int]

Sliding window size in pixels.

None
overlap Optional[int]

Overlap between adjacent windows in pixels.

None
batch_size int

Number of tiles per inference batch. Defaults to 4.

4
device Optional[str]

Device to use. If None, auto-detected.

None
token Optional[str]

HuggingFace API token for private repositories.

None
**kwargs Any

Additional keyword arguments passed to rfdetr_detect.

{}

Returns:

Type Description
Any

geopandas.GeoDataFrame: GeoDataFrame with georeferenced detections.

Source code in geoai/rfdetr.py
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
def rfdetr_detect_from_hub(
    input_path: str,
    repo_id: str,
    filename: str = "weights.pth",
    output_path: Optional[str] = None,
    confidence_threshold: float = 0.5,
    nms_threshold: float = 0.3,
    window_size: Optional[int] = None,
    overlap: Optional[int] = None,
    batch_size: int = 4,
    device: Optional[str] = None,
    token: Optional[str] = None,
    **kwargs: Any,
) -> Any:
    """Run RF-DETR detection using a model from Hugging Face Hub.

    Downloads a trained RF-DETR model from HuggingFace Hub and runs
    tiled detection on a GeoTIFF image.

    Args:
        input_path: Path to input GeoTIFF image.
        repo_id: HuggingFace Hub repository in "username/repo-name" format.
        filename: Model weights filename in the repository. Defaults to
            "weights.pth".
        output_path: Optional path to save the output GeoDataFrame.
        confidence_threshold: Minimum confidence score. Defaults to 0.5.
        nms_threshold: IoU threshold for NMS. Defaults to 0.3.
        window_size: Sliding window size in pixels.
        overlap: Overlap between adjacent windows in pixels.
        batch_size: Number of tiles per inference batch. Defaults to 4.
        device: Device to use. If None, auto-detected.
        token: HuggingFace API token for private repositories.
        **kwargs: Additional keyword arguments passed to rfdetr_detect.

    Returns:
        geopandas.GeoDataFrame: GeoDataFrame with georeferenced detections.
    """
    check_rfdetr_available()

    try:
        from huggingface_hub import hf_hub_download
    except ImportError:
        raise ImportError(
            "huggingface_hub is required. "
            "Install it with: pip install huggingface-hub"
        )

    # Download weights
    weights_path = hf_hub_download(repo_id=repo_id, filename=filename, token=token)

    # Try to download config
    model_variant = "base"
    class_names = None
    try:
        config_path = hf_hub_download(
            repo_id=repo_id, filename="config.json", token=token
        )
        with open(config_path, "r") as f:
            config = json.load(f)
        model_variant = config.get("model_variant", "base")
        class_names = config.get("class_names", None)
    except Exception:
        logger.warning(
            "Could not download config.json from %s. Using defaults.", repo_id
        )

    return rfdetr_detect(
        input_path=input_path,
        output_path=output_path,
        model_variant=model_variant,
        pretrain_weights=weights_path,
        confidence_threshold=confidence_threshold,
        nms_threshold=nms_threshold,
        window_size=window_size,
        overlap=overlap,
        batch_size=batch_size,
        class_names=class_names,
        device=device,
        **kwargs,
    )

rfdetr_train(dataset_dir, model_variant='base', epochs=100, batch_size=4, output_dir='output', pretrain_weights=None, device=None, **kwargs)

Train an RF-DETR model on a COCO or YOLO format dataset.

Wraps the RF-DETR training API for fine-tuning on custom geospatial datasets. Supports COCO JSON and YOLO format annotations.

Parameters:

Name Type Description Default
dataset_dir str

Path to the dataset directory. For COCO format, must contain train/_annotations.coco.json. For YOLO format, must contain data.yaml.

required
model_variant str

RF-DETR model variant to use. Defaults to "base".

'base'
epochs int

Number of training epochs. Defaults to 100.

100
batch_size int

Training batch size. Defaults to 4.

4
output_dir str

Directory to save checkpoints and logs. Defaults to "output".

'output'
pretrain_weights Optional[str]

Path to custom pretrained weights to start from. If None, uses COCO pretrained weights.

None
device Optional[str]

Device to use ("cpu", "cuda", "mps"). If None, auto-detected.

None
**kwargs Any

Additional keyword arguments forwarded to RF-DETR's TrainConfig (e.g., lr, lr_encoder, use_ema, grad_accum_steps, warmup_epochs, early_stopping, multi_scale).

{}

Returns:

Name Type Description
str str

Path to the output directory containing the best checkpoint

str

and training logs.

Source code in geoai/rfdetr.py
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
def rfdetr_train(
    dataset_dir: str,
    model_variant: str = "base",
    epochs: int = 100,
    batch_size: int = 4,
    output_dir: str = "output",
    pretrain_weights: Optional[str] = None,
    device: Optional[str] = None,
    **kwargs: Any,
) -> str:
    """Train an RF-DETR model on a COCO or YOLO format dataset.

    Wraps the RF-DETR training API for fine-tuning on custom geospatial
    datasets. Supports COCO JSON and YOLO format annotations.

    Args:
        dataset_dir: Path to the dataset directory. For COCO format, must
            contain ``train/_annotations.coco.json``. For YOLO format,
            must contain ``data.yaml``.
        model_variant: RF-DETR model variant to use. Defaults to "base".
        epochs: Number of training epochs. Defaults to 100.
        batch_size: Training batch size. Defaults to 4.
        output_dir: Directory to save checkpoints and logs. Defaults to
            "output".
        pretrain_weights: Path to custom pretrained weights to start from.
            If None, uses COCO pretrained weights.
        device: Device to use ("cpu", "cuda", "mps"). If None, auto-detected.
        **kwargs: Additional keyword arguments forwarded to RF-DETR's
            TrainConfig (e.g., lr, lr_encoder, use_ema, grad_accum_steps,
            warmup_epochs, early_stopping, multi_scale).

    Returns:
        str: Path to the output directory containing the best checkpoint
        and training logs.
    """
    check_rfdetr_available()

    model = _create_rfdetr_model(
        model_variant=model_variant,
        pretrain_weights=pretrain_weights,
        device=device,
    )

    train_kwargs = {
        "dataset_dir": dataset_dir,
        "epochs": epochs,
        "batch_size": batch_size,
        "output_dir": output_dir,
    }
    train_kwargs.update(kwargs)

    logger.info(
        "Starting RF-DETR training: variant=%s, epochs=%d, batch_size=%d",
        model_variant,
        epochs,
        batch_size,
    )
    model.train(**train_kwargs)

    logger.info("Training complete. Output saved to: %s", output_dir)
    return output_dir