Spaces:

Samhita
/

geolocator

Runtime error

App Files Files Community

Samhita commited on Oct 12, 2022

Commit

3cc543c

1 Parent(s): 889cf23

add gantry code

Browse files

Signed-off-by: Samhita Alla <aallasamhita@gmail.com>

Files changed (5) hide show

app.py +76 -27
gantry_callback/__init__.py +0 -0
gantry_callback/gantry_util.py +148 -0
gantry_callback/s3_util.py +166 -0
gantry_callback/string_img_util.py +27 -0

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import mimetypes
 import os
-from typing import Tuple
 import gradio as gr
 import pandas as pd
@@ -9,10 +9,17 @@ import plotly
 import plotly.express as px
 import requests
 from dotenv import load_dotenv
 load_dotenv()
 URL = os.getenv("ENDPOINT")
 def get_plotly_graph(
@@ -21,7 +28,7 @@ def get_plotly_graph(
     lat_long_data = [[latitude, longitude, location]]
     map_df = pd.DataFrame(lat_long_data, columns=["latitude", "longitude", "location"])
-    px.set_mapbox_access_token(os.getenv("MAPBOX_TOKEN"))
     fig = px.scatter_mapbox(
         map_df,
         lat="latitude",
@@ -36,7 +43,29 @@ def get_plotly_graph(
     return fig
-def image_gradio(img_file: str) -> Tuple[str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-image",
@@ -50,13 +79,10 @@ def image_gradio(img_file: str) -> Tuple[str, plotly.graph_objects.Figure]:
         ).text
     )
-    location = data["location"]
-    return data["location"], get_plotly_graph(
-        latitude=data["latitude"], longitude=data["longitude"], location=location
-    )
-def video_gradio(video_file: str) -> Tuple[str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-video",
@@ -70,13 +96,10 @@ def video_gradio(video_file: str) -> Tuple[str, plotly.graph_objects.Figure]:
         ).text
     )
-    location = data["location"]
-    return location, get_plotly_graph(
-        latitude=data["latitude"], longitude=data["longitude"], location=location
-    )
-def url_gradio(url: str) -> Tuple[str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-url",
@@ -85,32 +108,31 @@ def url_gradio(url: str) -> Tuple[str, plotly.graph_objects.Figure]:
         ).text
     )
-    location = data["location"]
-    return location, get_plotly_graph(
-        latitude=data["latitude"], longitude=data["longitude"], location=location
-    )
 with gr.Blocks() as demo:
     gr.Markdown("# GeoLocator")
     gr.Markdown(
-        "## An app that guesses the location of an image 🌌, a video 📹 or a YouTube link 🔗."
-    )
-    gr.Markdown(
-        "Find the code powering this application [here](https://github.com/samhita-alla/geolocator)."
     )
     with gr.Tab("Image"):
         with gr.Row():
-            img_input = gr.Image(type="filepath", label="im")
             with gr.Column():
                 img_text_output = gr.Textbox(label="Location")
                 img_plot = gr.Plot()
         img_text_button = gr.Button("Go locate!")
     with gr.Tab("Video"):
         with gr.Row():
-            video_input = gr.Video(type="filepath", label="video")
             with gr.Column():
                 video_text_output = gr.Textbox(label="Location")
                 video_plot = gr.Plot()
         video_text_button = gr.Button("Go locate!")
     with gr.Tab("YouTube Link"):
@@ -118,19 +140,46 @@ with gr.Blocks() as demo:
             url_input = gr.Textbox(label="YouTube video link")
             with gr.Column():
                 url_text_output = gr.Textbox(label="Location")
                 url_plot = gr.Plot()
         url_text_button = gr.Button("Go locate!")
     img_text_button.click(
-        image_gradio, inputs=img_input, outputs=[img_text_output, img_plot]
     )
     video_text_button.click(
-        video_gradio, inputs=video_input, outputs=[video_text_output, video_plot]
     )
     url_text_button.click(
-        url_gradio, inputs=url_input, outputs=[url_text_output, url_plot]
     )
-    examples = gr.Examples(".", inputs=[img_input, url_input])
 demo.launch()

 import json
 import mimetypes
 import os
+from typing import Dict, Tuple, Union
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 import requests
 from dotenv import load_dotenv
+from gantry_callback.gantry_util import GantryImageToTextLogger
+from gantry_callback.s3_util import make_unique_bucket_name
 load_dotenv()
 URL = os.getenv("ENDPOINT")
+GANTRY_APP_NAME = os.getenv("GANTRY_APP_NAME")
+GANTRY_KEY = os.getenv("GANTRY_API_KEY")
+AWS_KEY = os.getenv("AWS_KEY")
+AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
+MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN")
 def get_plotly_graph(
     lat_long_data = [[latitude, longitude, location]]
     map_df = pd.DataFrame(lat_long_data, columns=["latitude", "longitude", "location"])
+    px.set_mapbox_access_token(MAPBOX_TOKEN)
     fig = px.scatter_mapbox(
         map_df,
         lat="latitude",
     return fig
+def gradio_error():
+    raise gr.Error("Unable to detect the location!")
+def get_outputs(
+    data: Dict[str, Union[str, float, None]]
+) -> Tuple[str, str, plotly.graph_objects.Figure]:
+    location, latitude, longitude = (
+        data["location"],
+        data["latitude"],
+        data["longitude"],
+    )
+    if location is None:
+        gradio_error()
+    return (
+        data["location"],
+        f"{latitude},{longitude}",
+        get_plotly_graph(latitude=latitude, longitude=longitude, location=location),
+    )
+def image_gradio(img_file: str) -> Tuple[str, str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-image",
         ).text
     )
+    return get_outputs(data=data)
+def video_gradio(video_file: str) -> Tuple[str, str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-video",
         ).text
     )
+    return get_outputs(data=data)
+def url_gradio(url: str) -> Tuple[str, str, plotly.graph_objects.Figure]:
     data = json.loads(
         requests.post(
             f"{URL}predict-url",
         ).text
     )
+    return get_outputs(data=data)
 with gr.Blocks() as demo:
     gr.Markdown("# GeoLocator")
     gr.Markdown(
+        "### An app that guesses the location of an image 🌌, a video 📹 or a YouTube link 🔗."
     )
     with gr.Tab("Image"):
         with gr.Row():
+            img_input = gr.Image(type="filepath", label="Image")
             with gr.Column():
                 img_text_output = gr.Textbox(label="Location")
+                img_coordinates = gr.Textbox(label="Coordinates")
                 img_plot = gr.Plot()
         img_text_button = gr.Button("Go locate!")
+        with gr.Row():
+            # Flag button
+            img_flag_button = gr.Button("Flag this output")
     with gr.Tab("Video"):
         with gr.Row():
+            video_input = gr.Video(type="filepath", label="Video")
             with gr.Column():
                 video_text_output = gr.Textbox(label="Location")
+                video_coordinates = gr.Textbox(label="Coordinates")
                 video_plot = gr.Plot()
         video_text_button = gr.Button("Go locate!")
     with gr.Tab("YouTube Link"):
             url_input = gr.Textbox(label="YouTube video link")
             with gr.Column():
                 url_text_output = gr.Textbox(label="Location")
+                url_coordinates = gr.Textbox(label="Coordinates")
                 url_plot = gr.Plot()
         url_text_button = gr.Button("Go locate!")
+    # Gantry flagging for image #
+    callback = GantryImageToTextLogger(application=GANTRY_APP_NAME, api_key=GANTRY_KEY)
+    callback.setup(
+        components=[img_input, img_text_output],
+        flagging_dir=make_unique_bucket_name(prefix=GANTRY_APP_NAME, seed="420"),
+    )
+    img_flag_button.click(
+        fn=lambda *args: callback.flag(args),
+        inputs=[img_input, img_text_output, img_coordinates],
+        outputs=None,
+        preprocess=False,
+    )
+    ###################
     img_text_button.click(
+        image_gradio,
+        inputs=img_input,
+        outputs=[img_text_output, img_coordinates, img_plot],
     )
     video_text_button.click(
+        video_gradio,
+        inputs=video_input,
+        outputs=[video_text_output, video_coordinates, video_plot],
     )
     url_text_button.click(
+        url_gradio,
+        inputs=url_input,
+        outputs=[url_text_output, url_coordinates, url_plot],
     )
+    examples = gr.Examples(".", inputs=[img_input, video_input, url_input])
+    gr.Markdown(
+        "Check out the [GitHub repository](https://github.com/samhita-alla/geolocator) that this demo is based off of."
+    )
 demo.launch()

gantry_callback/__init__.py ADDED Viewed

File without changes

gantry_callback/gantry_util.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Class to handle flagging in Gradio to Gantry.
+Originally written by the FSDL educators at https://github.com/full-stack-deep-learning/fsdl-text-recognizer-2022/blob/main/app_gradio/flagging.py
+that has been adjusted for the geolocator project.
+"""
+import os
+from typing import List, Optional, Union
+import gantry
+import gradio as gr
+from gradio.components import Component
+from smart_open import open
+from .s3_util import (
+    add_access_policy,
+    enable_bucket_versioning,
+    get_or_create_bucket,
+    get_uri_of,
+    make_key,
+)
+from .string_img_util import read_b64_string
+class GantryImageToTextLogger(gr.FlaggingCallback):
+    """
+    A FlaggingCallback that logs flagged image-to-text data to Gantry via S3.
+    """
+    def __init__(
+        self,
+        application: str,
+        version: Union[int, str, None] = None,
+        api_key: Optional[str] = None,
+    ):
+        """Logs image-to-text data that was flagged in Gradio to Gantry.
+        Images are logged to Amazon Web Services' Simple Storage Service (S3).
+        The flagging_dir provided to the Gradio interface is used to set the
+        name of the bucket on S3 into which images are logged.
+        See the following tutorial by Dan Bader for a quick overview of S3 and the AWS SDK
+        for Python, boto3: https://realpython.com/python-boto3-aws-s3/
+        See https://gradio.app/docs/#flagging for details on how
+        flagging data is handled by Gradio.
+        See https://docs.gantry.io for information about logging data to Gantry.
+        Parameters
+        ----------
+        application
+            The name of the application on Gantry to which flagged data should be uploaded.
+            Gantry validates and monitors data per application.
+        version
+            The schema version to use during validation by Gantry. If not provided, Gantry
+            will use the latest version. A new version will be created if the provided version
+            does not exist yet.
+        api_key
+            Optionally, provide your Gantry API key here. Provided for convenience
+            when testing and developing locally or in notebooks. The API key can
+            alternatively be provided via the GANTRY_API_KEY environment variable.
+        """
+        self.application = application
+        self.version = version
+        gantry.init(api_key=api_key)
+    def setup(self, components: List[Component], flagging_dir: str):
+        """Sets up the GantryImageToTextLogger by creating or attaching to an S3 Bucket."""
+        self._counter = 0
+        self.bucket = get_or_create_bucket(flagging_dir)
+        enable_bucket_versioning(self.bucket)
+        add_access_policy(self.bucket)
+        (
+            self.image_component_idx,
+            self.text_component_idx,
+            self.text_component2_idx,
+        ) = self._find_image_video_and_text_components(components)
+    def flag(self, flag_data, flag_option=None, flag_index=None, username=None) -> int:
+        """Sends flagged outputs and feedback to Gantry and image inputs to S3."""
+        image = flag_data[self.image_component_idx]
+        text = flag_data[self.text_component_idx]
+        text2 = flag_data[self.text_component2_idx]
+        feedback = {"flag": flag_option}
+        if username is not None:
+            feedback["user"] = username
+        data_type, image_buffer = read_b64_string(image, return_data_type=True)
+        image_url = self._to_s3(image_buffer.read(), filetype=data_type)
+        self._to_gantry(
+            input_image_url=image_url,
+            pred_location=text,
+            pred_coordinates=text2,
+            feedback=feedback,
+        )
+        self._counter += 1
+        return self._counter
+    def _to_gantry(self, input_image_url, pred_location, pred_coordinates, feedback):
+        inputs = {"image": input_image_url}
+        outputs = {"location": pred_location, "coordinates": pred_coordinates}
+        gantry.log_record(
+            self.application,
+            self.version,
+            inputs=inputs,
+            outputs=outputs,
+            feedback=feedback,
+        )
+    def _to_s3(self, image_bytes, key=None, filetype=None):
+        if key is None:
+            key = make_key(image_bytes, filetype=filetype)
+        s3_uri = get_uri_of(self.bucket, key)
+        with open(s3_uri, "wb") as s3_object:
+            s3_object.write(image_bytes)
+        return s3_uri
+    def _find_image_video_and_text_components(self, components: List[Component]):
+        """
+        Manual indexing of images and text components
+        """
+        image_component_idx = 0
+        text_component_idx = 1
+        text_component2_idx = 2
+        return (
+            image_component_idx,
+            text_component_idx,
+            text_component2_idx,
+        )
+def get_api_key() -> Optional[str]:
+    """Convenience method for fetching the Gantry API key."""
+    api_key = os.environ.get("GANTRY_API_KEY")
+    return api_key

gantry_callback/s3_util.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""
+Utility that uses boto to create buckets.
+This work is not our own but is entirely written by https://github.com/full-stack-deep-learning.
+"""
+import hashlib
+import json
+import boto3
+import botocore
+S3_URL_FORMAT = "https://{bucket}.s3.{region}.amazonaws.com/{key}"
+S3_URI_FORMAT = "s3://{bucket}/{key}"
+s3 = boto3.resource("s3")
+def get_or_create_bucket(name):
+    """Gets an S3 bucket with boto3 or creates it if it doesn't exist."""
+    try:  # try to create a bucket
+        name, response = _create_bucket(name)
+    except botocore.exceptions.ClientError as err:
+        # error handling from https://github.com/boto/boto3/issues/1195#issuecomment-495842252
+        status = err.response["ResponseMetadata"][
+            "HTTPStatusCode"
+        ]  # status codes identify particular errors
+        if status == 409:  # if the bucket exists already,
+            pass  # we don't need to make it -- we presume we have the right permissions
+        else:
+            raise err
+    bucket = s3.Bucket(name)
+    return bucket
+def _create_bucket(name):
+    """Creates a bucket with the provided name."""
+    session = boto3.session.Session()  # sessions hold on to credentials and config
+    current_region = session.region_name  # so we can pull the default region
+    bucket_config = {"LocationConstraint": current_region}  # and apply it to the bucket
+    bucket_response = s3.create_bucket(
+        Bucket=name, CreateBucketConfiguration=bucket_config
+    )
+    return name, bucket_response
+def make_key(fileobj, filetype=None):
+    """Creates a unique key for the fileobj and optionally append the filetype."""
+    identifier = make_identifier(fileobj)
+    if filetype is None:
+        return identifier
+    else:
+        return identifier + "." + filetype
+def make_unique_bucket_name(prefix, seed):
+    """Creates a unique bucket name from a prefix and a seed."""
+    name = hashlib.sha256(seed.encode("utf-8")).hexdigest()[:10]
+    return prefix + "-" + name
+def get_url_of(bucket, key=None):
+    """Returns the url of a bucket and optionally of an object in that bucket."""
+    if not isinstance(bucket, str):
+        bucket = bucket.name
+    region = _get_region(bucket)
+    key = key or ""
+    url = _format_url(bucket, region, key)
+    return url
+def get_uri_of(bucket, key=None):
+    """Returns the s3:// uri of a bucket and optionally of an object in that bucket."""
+    if not isinstance(bucket, str):
+        bucket = bucket.name
+    key = key or ""
+    uri = _format_uri(bucket, key)
+    return uri
+def enable_bucket_versioning(bucket):
+    """Turns on versioning for bucket contents, which avoids deletion."""
+    if not isinstance(bucket, str):
+        bucket = bucket.name
+    bucket_versioning = s3.BucketVersioning(bucket)
+    return bucket_versioning.enable()
+def add_access_policy(bucket):
+    """Adds a policy to our bucket that allows the Gantry app to access data."""
+    access_policy = json.dumps(_get_policy(bucket.name))
+    s3.meta.client.put_bucket_policy(Bucket=bucket.name, Policy=access_policy)
+def _get_policy(bucket_name):
+    """Returns a bucket policy allowing Gantry app access as a JSON-compatible dictionary."""
+    return {
+        "Version": "2012-10-17",
+        "Statement": [
+            {
+                "Effect": "Allow",
+                "Principal": {
+                    "AWS": [
+                        "arn:aws:iam::848836713690:root",
+                        "arn:aws:iam::339325199688:root",
+                        "arn:aws:iam::665957668247:root",
+                    ]
+                },
+                "Action": ["s3:GetObject", "s3:GetObjectVersion"],
+                "Resource": f"arn:aws:s3:::{bucket_name}/*",
+            },
+            {
+                "Effect": "Allow",
+                "Principal": {
+                    "AWS": [
+                        "arn:aws:iam::848836713690:root",
+                        "arn:aws:iam::339325199688:root",
+                        "arn:aws:iam::665957668247:root",
+                    ]
+                },
+                "Action": "s3:ListBucketVersions",
+                "Resource": f"arn:aws:s3:::{bucket_name}",
+            },
+        ],
+    }
+def make_identifier(byte_data):
+    """Create a unique identifier for a collection of bytes via hashing."""
+    # feed them to hashing algo -- security is not critical here, so we use SHA-1
+    hashed_data = hashlib.sha1(byte_data)  # noqa: S3
+    identifier = hashed_data.hexdigest()  # turn it into hexdecimal
+    return identifier
+def _get_region(bucket):
+    """Determine the region of an s3 bucket."""
+    if not isinstance(bucket, str):
+        bucket = bucket.name
+    s3_client = boto3.client("s3")
+    bucket_location_response = s3_client.get_bucket_location(Bucket=bucket)
+    bucket_location = bucket_location_response["LocationConstraint"]
+    return bucket_location
+def _format_url(bucket_name, region, key=None):
+    key = key or ""
+    url = S3_URL_FORMAT.format(bucket=bucket_name, region=region, key=key)
+    return url
+def _format_uri(bucket_name, key=None):
+    key = key or ""
+    uri = S3_URI_FORMAT.format(bucket=bucket_name, key=key)
+    return uri

gantry_callback/string_img_util.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import base64
+from io import BytesIO
+def read_b64_string(b64_string, return_data_type=False):
+    """Read a base64-encoded string into an in-memory file-like object."""
+    data_header, b64_data = split_and_validate_b64_string(b64_string)
+    b64_buffer = BytesIO(base64.b64decode(b64_data))
+    if return_data_type:
+        return get_b64_filetype(data_header), b64_buffer
+    else:
+        return b64_buffer
+def get_b64_filetype(data_header):
+    """Retrieves the filetype information from the data type header of a base64-encoded object."""
+    _, file_type = data_header.split("/")
+    return file_type
+def split_and_validate_b64_string(b64_string):
+    """Return the data_type and data of a b64 string, with validation."""
+    header, data = b64_string.split(",", 1)
+    assert header.startswith("data:")
+    assert header.endswith(";base64")
+    data_type = header.split(";")[0].split(":")[1]
+    return data_type, data