Spaces:

Prince9191
/

Object-Detection-fb_basic

Sleeping

App Files Files Community

Prince9191 commited on Apr 18

Commit

2183303

verified ·

1 Parent(s): 96d7fb7

Upload 4 files

Browse files

Files changed (4) hide show

app.py +22 -0
helper.py +103 -0
object_detection.py +68 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import streamlit as st
+from PIL import Image
+from object_detection import run_od_pipeline
+# Streamlit Page Setup
+st.set_page_config(page_title="🧠 Object Detection App", layout="centered")
+st.title("🧠 Object Detection using Hugging Face")
+st.markdown("Upload an image to run object detection:")
+# Upload image
+uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+if uploaded_file:
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Run object detection
+    st.write("Running object detection...")
+    detected_img = run_od_pipeline(image)
+    st.image(detected_img, caption="Detected Objects", use_column_width=True)

helper.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# -*- coding: utf-8 -*-
+"""helper.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1IDhEhDLbnCTaBfIbuMtlNFW3ntQiZBwA
+"""
+import io
+import matplotlib.pyplot as plt
+import requests
+import inflect
+from PIL import Image
+def load_image_from_url(url):
+    return Image.open(requests.get(url, stream=True).raw)
+def render_results_in_image(in_pil_img, in_results):
+    plt.figure(figsize=(16, 10))
+    plt.imshow(in_pil_img)
+    ax = plt.gca()
+    for prediction in in_results:
+        x, y = prediction['box']['xmin'], prediction['box']['ymin']
+        w = prediction['box']['xmax'] - prediction['box']['xmin']
+        h = prediction['box']['ymax'] - prediction['box']['ymin']
+        ax.add_patch(plt.Rectangle((x, y),
+                                   w,
+                                   h,
+                                   fill=False,
+                                   color="green",
+                                   linewidth=2))
+        ax.text(
+           x,
+           y,
+           f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
+           color='red'
+        )
+    plt.axis("off")
+    # Save the modified image to a BytesIO object
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, format='png',
+                bbox_inches='tight',
+                pad_inches=0)
+    img_buf.seek(0)
+    modified_image = Image.open(img_buf)
+    # Close the plot to prevent it from being displayed
+    plt.close()
+    return modified_image
+def summarize_predictions_natural_language(predictions):
+    summary = {}
+    p = inflect.engine()
+    for prediction in predictions:
+        label = prediction['label']
+        if label in summary:
+            summary[label] += 1
+        else:
+            summary[label] = 1
+    result_string = "In this image, there are "
+    for i, (label, count) in enumerate(summary.items()):
+        count_string = p.number_to_words(count)
+        result_string += f"{count_string} {label}"
+        if count > 1:
+          result_string += "s"
+        result_string += " "
+        if i == len(summary) - 2:
+          result_string += "and "
+    # Remove the trailing comma and space
+    result_string = result_string.rstrip(', ') + "."
+    return result_string
+##### To ignore warnings #####
+import warnings
+import logging
+from transformers import logging as hf_logging
+def ignore_warnings():
+    # Ignore specific Python warnings
+    warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
+    warnings.filterwarnings("ignore", message="Could not find image processor class")
+    warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")
+    # Adjust logging for libraries using the logging module
+    logging.basicConfig(level=logging.ERROR)
+    hf_logging.set_verbosity_error()
+########

object_detection.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from gtts import gTTS
+import tempfile
+import subprocess
+import sys
+import gradio
+def ensure_package_installed(package_name):
+    try:
+        __import__(package_name)
+    except ImportError:
+        print(f"{package_name} package not found. Installing...")
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+        __import__(package_name)
+# Check and install openai
+ensure_package_installed("gradio")
+ensure_package_installed("transformers")
+ensure_package_installed("gtts")
+# Load the image captioning model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+def generate_description(image):
+    """Generates a textual description of the given image using a pre-trained BLIP model."""
+    inputs = processor(image, return_tensors="pt").to(model.device)
+    output = model.generate(**inputs)
+    description = processor.decode(output[0], skip_special_tokens=True)
+    return description
+def text_to_speech(text):
+    """Converts text to speech using gTTS and returns the audio file path."""
+    tts = gTTS(text=text, lang='en')
+    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(temp_audio.name)
+    return temp_audio.name
+def process_image(image):
+    """Processes the uploaded image to generate description and return audio file."""
+    description = generate_description(image)
+    return description
+def get_audio(description):
+    """Generates the audio file for the given description."""
+    return text_to_speech(description)
+# Build Gradio Interface
+with gradio.Blocks() as demo:
+    gradio.Markdown("# Image Description and Audio Transcript App")
+    gradio.Markdown("Upload an image to get an AI-generated description. Click the button to hear the description.")
+    with gradio.Row():
+        image_input = gradio.Image(type="pil")
+        text_output = gradio.Textbox(label="Generated Description")
+    generate_btn = gradio.Button("Generate Description")
+    audio_btn = gradio.Button("Click here for an audio transcript")
+    audio_output = gradio.Audio()
+    generate_btn.click(process_image, inputs=[image_input], outputs=[text_output])
+    audio_btn.click(get_audio, inputs=[text_output], outputs=[audio_output])
+# Launch the Gradio app
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+transformers
+Pillow
+matplotlib
+inflect
+gTTS
+gradio
+requests