Using Hugging Face Auto Modules with GeoAI¶

This notebook demonstrates how to use the AutoGeoModel and related functions from GeoAI for various geospatial machine learning tasks using Hugging Face models.

Supported Tasks¶

Zero-shot Object Detection: Detect objects using text prompts (e.g., Grounding DINO)
Semantic Segmentation: Pixel-level classification (e.g., SegFormer)
Image Classification: Classify entire images (e.g., ViT)

In [ ]:

Copied!

# %pip install -U geoai-py transformers
# %pip install -U geoai-py transformers

In [ ]:

Copied!





from geoai import download_file
from geoai.auto import (
    AutoGeoModel,
    semantic_segmentation,
    image_classification,
    object_detection,
    get_hf_tasks,
    get_hf_model_config,
    show_image,
    show_detections,
    show_segmentation,
)
from geoai import download_file
from geoai.auto import (
    AutoGeoModel,
    semantic_segmentation,
    image_classification,
    object_detection,
    get_hf_tasks,
    get_hf_model_config,
    show_image,
    show_detections,
    show_segmentation,
)

Download Sample Data¶

Download sample aerial imagery for the examples.

In [ ]:

Copied!

image_url = "https://huggingface.co/datasets/giswqs/geospatial/resolve/main/aerial.tif"
image_path = download_file(image_url, "aerial.tif")
image_url = "https://huggingface.co/datasets/giswqs/geospatial/resolve/main/aerial.tif"
image_path = download_file(image_url, "aerial.tif")

Display the Image¶

In [ ]:

Copied!

show_image(image_path, title="Aerial Image")
show_image(image_path, title="Aerial Image")

1. Zero-Shot Object Detection with Grounding DINO¶

Grounding DINO allows you to detect objects using natural language text prompts. This is useful when you need to detect custom objects in aerial/satellite imagery without training a new model.

In [ ]:

Copied!





result = object_detection(
    image_path,
    labels=["building", "tree", "car", "road"],
    box_threshold=0.25,
    text_threshold=0.25,
)

print(f"Detected {len(result.get('boxes', []))} objects")
result = object_detection(
    image_path,
    labels=["building", "tree", "car", "road"],
    box_threshold=0.25,
    text_threshold=0.25,
)

print(f"Detected {len(result.get('boxes', []))} objects")

Visualize Detection Results¶

In [ ]:

Copied!





show_detections(
    image_path,
    result,
    title="Zero-Shot Object Detection with Grounding DINO",
)
show_detections(
    image_path,
    result,
    title="Zero-Shot Object Detection with Grounding DINO",
)

Save Detection Results to GeoJSON¶

In [ ]:

Copied!





result = object_detection(
    image_path,
    labels=["building", "tree", "car"],
    box_threshold=0.25,
    output_vector_path="detections.geojson",
)

if "geodataframe" in result:
    print("Detection results saved to detections.geojson")
    print(result["geodataframe"].head())
result = object_detection(
    image_path,
    labels=["building", "tree", "car"],
    box_threshold=0.25,
    output_vector_path="detections.geojson",
)

if "geodataframe" in result:
    print("Detection results saved to detections.geojson")
    print(result["geodataframe"].head())

Using AutoGeoModel Directly¶

For more control, you can use AutoGeoModel directly:

In [ ]:

Copied!





model = AutoGeoModel.from_pretrained(
    "IDEA-Research/grounding-dino-base",
    task="zero-shot-object-detection",
)

result = model.predict(
    image_path,
    text="a building. a tree. a car.",
    box_threshold=0.25,
    text_threshold=0.25,
)

print("Detection Results:")
if "boxes" in result:
    for box, score, label in zip(result["boxes"], result["scores"], result["labels"]):
        print(f"  - {label}: {score:.3f}")
model = AutoGeoModel.from_pretrained(
    "IDEA-Research/grounding-dino-base",
    task="zero-shot-object-detection",
)

result = model.predict(
    image_path,
    text="a building. a tree. a car.",
    box_threshold=0.25,
    text_threshold=0.25,
)

print("Detection Results:")
if "boxes" in result:
    for box, score, label in zip(result["boxes"], result["scores"], result["labels"]):
        print(f"  - {label}: {score:.3f}")

2. Semantic Segmentation with SegFormer¶

Semantic segmentation assigns a class label to each pixel in the image. This is useful for land cover classification.

In [ ]:

Copied!





seg_result = semantic_segmentation(
    image_path,
    output_path="segmentation_output.tif",
    model_name="nvidia/segformer-b0-finetuned-ade-512-512",
)

mask = seg_result.get("mask", seg_result.get("output"))
print(f"Segmentation mask shape: {mask.shape}")
print(f"Unique classes: {len(set(mask.flatten()))}")
print(f"Output saved to: segmentation_output.tif")
seg_result = semantic_segmentation(
    image_path,
    output_path="segmentation_output.tif",
    model_name="nvidia/segformer-b0-finetuned-ade-512-512",
)

mask = seg_result.get("mask", seg_result.get("output"))
print(f"Segmentation mask shape: {mask.shape}")
print(f"Unique classes: {len(set(mask.flatten()))}")
print(f"Output saved to: segmentation_output.tif")

Visualize Segmentation Results¶

In [ ]:

Copied!





show_segmentation(
    image_path,
    mask,
    title="Semantic Segmentation (SegFormer)",
    alpha=0.6,
)
show_segmentation(
    image_path,
    mask,
    title="Semantic Segmentation (SegFormer)",
    alpha=0.6,
)

Vectorize Segmentation Results¶

In [ ]:

Copied!





seg_result = semantic_segmentation(
    image_path,
    output_path="segmentation_output.tif",
    output_vector_path="segmentation.geojson",
    model_name="nvidia/segformer-b0-finetuned-ade-512-512",
    min_object_area=50,
    simplify_tolerance=1.0,
)

if "geodataframe" in seg_result:
    print("Vectorized segmentation saved to segmentation.geojson")
    print(f"Number of polygons: {len(seg_result['geodataframe'])}")
seg_result = semantic_segmentation(
    image_path,
    output_path="segmentation_output.tif",
    output_vector_path="segmentation.geojson",
    model_name="nvidia/segformer-b0-finetuned-ade-512-512",
    min_object_area=50,
    simplify_tolerance=1.0,
)

if "geodataframe" in seg_result:
    print("Vectorized segmentation saved to segmentation.geojson")
    print(f"Number of polygons: {len(seg_result['geodataframe'])}")

3. Image Classification with ViT¶

Classify entire images into categories using Vision Transformer (ViT).

In [ ]:

Copied!





import numpy as np
from transformers import AutoConfig

cls_result = image_classification(
    image_path,
    model_name="google/vit-base-patch16-224",
)

# Get class labels from model config
config = AutoConfig.from_pretrained("google/vit-base-patch16-224")
id2label = config.id2label

print(f"Predicted class index: {cls_result.get('class')}")
if "probabilities" in cls_result and cls_result["probabilities"] is not None:
    probs = cls_result["probabilities"]
    top_indices = np.argsort(probs)[-5:][::-1]
    print("\nTop 5 predictions:")
    for idx in top_indices:
        label = id2label.get(idx, f"Class {idx}")
        print(f"  {label}: {probs[idx]:.4f}")
import numpy as np
from transformers import AutoConfig

cls_result = image_classification(
    image_path,
    model_name="google/vit-base-patch16-224",
)

# Get class labels from model config
config = AutoConfig.from_pretrained("google/vit-base-patch16-224")
id2label = config.id2label

print(f"Predicted class index: {cls_result.get('class')}")
if "probabilities" in cls_result and cls_result["probabilities"] is not None:
    probs = cls_result["probabilities"]
    top_indices = np.argsort(probs)[-5:][::-1]
    print("\nTop 5 predictions:")
    for idx in top_indices:
        label = id2label.get(idx, f"Class {idx}")
        print(f"  {label}: {probs[idx]:.4f}")

5. List Available Tasks¶

In [ ]:

Copied!





tasks = get_hf_tasks()
print("Supported tasks:")
for task in tasks:
    print(f"  - {task}")
tasks = get_hf_tasks()
print("Supported tasks:")
for task in tasks:
    print(f"  - {task}")

6. Get Model Configuration¶

In [ ]:

Copied!





config = get_hf_model_config("nvidia/segformer-b0-finetuned-ade-512-512")
print(f"Model type: {config.get('model_type')}")
print(f"Number of labels: {config.get('num_labels')}")
print(f"Hidden sizes: {config.get('hidden_sizes')}")
config = get_hf_model_config("nvidia/segformer-b0-finetuned-ade-512-512")
print(f"Model type: {config.get('model_type')}")
print(f"Number of labels: {config.get('num_labels')}")
print(f"Hidden sizes: {config.get('hidden_sizes')}")

Cleanup¶

In [ ]:

Copied!





import os

files_to_remove = [
    "aerial.tif",
    "segmentation_output.tif",
    "segmentation.geojson",
    "detections.geojson",
]

for f in files_to_remove:
    if os.path.exists(f):
        os.remove(f)
        print(f"Removed {f}")
import os

files_to_remove = [
    "aerial.tif",
    "segmentation_output.tif",
    "segmentation.geojson",
    "detections.geojson",
]

for f in files_to_remove:
    if os.path.exists(f):
        os.remove(f)
        print(f"Removed {f}")