Spaces:
Runtime error
Runtime error
import gradio as gr | |
from time import time | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.utils import Bunch | |
from sklearn.datasets import fetch_species_distributions | |
from sklearn import svm, metrics | |
from typing import Union | |
try: | |
from mpl_toolkits.basemap import Basemap | |
basemap = True | |
except ImportError: | |
basemap = False | |
def construct_grids(batch): | |
"""Construct the map grid from the batch object | |
Parameters | |
---------- | |
batch : Batch object | |
The object returned by :func:`fetch_species_distributions` | |
Returns | |
------- | |
(xgrid, ygrid) : 1-D arrays | |
The grid corresponding to the values in batch.coverages | |
""" | |
# x,y coordinates for corner cells | |
xmin = batch.x_left_lower_corner + batch.grid_size | |
xmax = xmin + (batch.Nx * batch.grid_size) | |
ymin = batch.y_left_lower_corner + batch.grid_size | |
ymax = ymin + (batch.Ny * batch.grid_size) | |
# x coordinates of the grid cells | |
xgrid = np.arange(xmin, xmax, batch.grid_size) | |
# y coordinates of the grid cells | |
ygrid = np.arange(ymin, ymax, batch.grid_size) | |
return (xgrid, ygrid) | |
def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid): | |
"""Create a bunch with information about a particular organism | |
This will use the test/train record arrays to extract the | |
data specific to the given species name. | |
""" | |
bunch = Bunch(name=" ".join(species_name.split("_")[:2])) | |
species_name = species_name.encode("ascii") | |
points = dict(test=test, train=train) | |
for label, pts in points.items(): | |
# choose points associated with the desired species | |
pts = pts[pts["species"] == species_name] | |
bunch["pts_%s" % label] = pts | |
# determine coverage values for each of the training & testing points | |
ix = np.searchsorted(xgrid, pts["dd long"]) | |
iy = np.searchsorted(ygrid, pts["dd lat"]) | |
bunch["cov_%s" % label] = coverages[:, -iy, ix].T | |
return bunch | |
def translate_choice(choice: str) -> Union[str, tuple[str, str]]: | |
if choice == "Bradypus variegatus": | |
return "bradypus_variegatus_0" | |
elif choice == "Microryzomys minutus": | |
return "microryzomys_minutus_0" | |
else: | |
return ("bradypus_variegatus_0", "microryzomys_minutus_0") | |
def plot_species_distribution( | |
choice: Union[str, tuple[str, str]] | |
): | |
""" | |
Plot the species distribution. | |
""" | |
species = translate_choice(choice) | |
t0 = time() | |
# Load the compressed data | |
data = fetch_species_distributions() | |
# Set up the data grid | |
xgrid, ygrid = construct_grids(data) | |
# The grid in x,y coordinates | |
X, Y = np.meshgrid(xgrid, ygrid[::-1]) | |
species_bunches = [] | |
if isinstance(species, tuple): | |
# create a bunch for each species | |
BV_bunch = create_species_bunch( | |
species[0], data.train, data.test, data.coverages, xgrid, ygrid | |
) | |
MM_bunch = create_species_bunch( | |
species[1], data.train, data.test, data.coverages, xgrid, ygrid | |
) | |
species_bunches.extend([BV_bunch, MM_bunch]) | |
else: | |
# create a bunch for the given species | |
species_bunch = create_species_bunch( | |
species, data.train, data.test, data.coverages, xgrid, ygrid | |
) | |
species_bunches.append(species_bunch) | |
# background points (grid coordinates) for evaluation | |
np.random.seed(13) | |
background_points = np.c_[ | |
np.random.randint(low=0, high=data.Ny, size=10000), | |
np.random.randint(low=0, high=data.Nx, size=10000), | |
].T | |
# We'll make use of the fact that coverages[6] has measurements at all | |
# land points. This will help us decide between land and water. | |
land_reference = data.coverages[6] | |
# Fit, predict, and plot for each species. | |
for i, species in enumerate(species_bunches): | |
print("_" * 80) | |
print("Modeling distribution of species '%s'" % species.name) | |
# Standardize features | |
mean = species.cov_train.mean(axis=0) | |
std = species.cov_train.std(axis=0) | |
train_cover_std = (species.cov_train - mean) / std | |
# Fit OneClassSVM | |
print(" - fit OneClassSVM ... ", end="") | |
clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5) | |
clf.fit(train_cover_std) | |
print("done.") | |
# Plot map of South America | |
plt.subplot(1, len(species_bunches), i + 1) | |
if basemap: | |
print(" - plot coastlines using basemap") | |
m = Basemap( | |
projection="cyl", | |
llcrnrlat=Y.min(), | |
urcrnrlat=Y.max(), | |
llcrnrlon=X.min(), | |
urcrnrlon=X.max(), | |
resolution="c", | |
) | |
m.drawcoastlines() | |
m.drawcountries() | |
else: | |
print(" - plot coastlines from coverage") | |
plt.contour( | |
X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid" | |
) | |
plt.xticks([]) | |
plt.yticks([]) | |
print(" - predict species distribution") | |
# Predict species distribution using the training data | |
Z = np.ones((data.Ny, data.Nx), dtype=np.float64) | |
# We'll predict only for the land points. | |
idx = np.where(land_reference > -9999) | |
coverages_land = data.coverages[:, idx[0], idx[1]].T | |
pred = clf.decision_function((coverages_land - mean) / std) | |
Z *= pred.min() | |
Z[idx[0], idx[1]] = pred | |
levels = np.linspace(Z.min(), Z.max(), 25) | |
Z[land_reference == -9999] = -9999 | |
# plot contours of the prediction | |
plt.contourf(X, Y, Z, levels=levels, cmap="Reds") | |
plt.colorbar(format="%.2f") | |
# scatter training/testing points | |
plt.scatter( | |
species.pts_train["dd long"], | |
species.pts_train["dd lat"], | |
s=2**2, | |
c="black", | |
marker="^", | |
label="train", | |
) | |
plt.scatter( | |
species.pts_test["dd long"], | |
species.pts_test["dd lat"], | |
s=2**2, | |
c="black", | |
marker="x", | |
label="test", | |
) | |
plt.legend() | |
plt.title(species.name) | |
plt.axis("equal") | |
# Compute AUC with regards to background points | |
pred_background = Z[background_points[0], background_points[1]] | |
pred_test = clf.decision_function((species.cov_test - mean) / std) | |
scores = np.r_[pred_test, pred_background] | |
y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)] | |
fpr, tpr, thresholds = metrics.roc_curve(y, scores) | |
roc_auc = metrics.auc(fpr, tpr) | |
plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right") | |
print("\n Area under the ROC curve : %f" % roc_auc) | |
print("\ntime elapsed: %.2fs" % (time() - t0)) | |
return plt | |
iface = gr.Interface( | |
fn=plot_species_distribution, | |
inputs=gr.Radio(choices=["Bradypus variegatus","Microryzomys minutus", "Both"], | |
value="Bradypus variegatus", | |
label="Species"), | |
outputs=gr.Plot(label="Distribution Map"), | |
title="Species Distribution Map", | |
description="""This app predicts the distribution of a species using a OneClassSVM. Following [this tutorial](https://scikit-learn.org/stable/auto_examples/applications/plot_species_distribution_modeling.html#sphx-glr-auto-examples-applications-plot-species-distribution-modeling-py) from sklearn""", | |
examples=[ | |
["Bradypus variegatus"], | |
["Microryzomys minutus"]]) | |
iface.launch() |