Upload 4 files
Browse files- app.py +22 -0
- helper.py +103 -0
- object_detection.py +68 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
from object_detection import run_od_pipeline
|
4 |
+
|
5 |
+
# Streamlit Page Setup
|
6 |
+
st.set_page_config(page_title="🧠 Object Detection App", layout="centered")
|
7 |
+
st.title("🧠 Object Detection using Hugging Face")
|
8 |
+
|
9 |
+
st.markdown("Upload an image to run object detection:")
|
10 |
+
|
11 |
+
# Upload image
|
12 |
+
uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
|
13 |
+
|
14 |
+
if uploaded_file:
|
15 |
+
image = Image.open(uploaded_file)
|
16 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
17 |
+
|
18 |
+
# Run object detection
|
19 |
+
st.write("Running object detection...")
|
20 |
+
detected_img = run_od_pipeline(image)
|
21 |
+
|
22 |
+
st.image(detected_img, caption="Detected Objects", use_column_width=True)
|
helper.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""helper.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1IDhEhDLbnCTaBfIbuMtlNFW3ntQiZBwA
|
8 |
+
"""
|
9 |
+
|
10 |
+
import io
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import requests
|
13 |
+
import inflect
|
14 |
+
from PIL import Image
|
15 |
+
|
16 |
+
def load_image_from_url(url):
|
17 |
+
return Image.open(requests.get(url, stream=True).raw)
|
18 |
+
|
19 |
+
def render_results_in_image(in_pil_img, in_results):
|
20 |
+
plt.figure(figsize=(16, 10))
|
21 |
+
plt.imshow(in_pil_img)
|
22 |
+
|
23 |
+
ax = plt.gca()
|
24 |
+
|
25 |
+
for prediction in in_results:
|
26 |
+
|
27 |
+
x, y = prediction['box']['xmin'], prediction['box']['ymin']
|
28 |
+
w = prediction['box']['xmax'] - prediction['box']['xmin']
|
29 |
+
h = prediction['box']['ymax'] - prediction['box']['ymin']
|
30 |
+
|
31 |
+
ax.add_patch(plt.Rectangle((x, y),
|
32 |
+
w,
|
33 |
+
h,
|
34 |
+
fill=False,
|
35 |
+
color="green",
|
36 |
+
linewidth=2))
|
37 |
+
ax.text(
|
38 |
+
x,
|
39 |
+
y,
|
40 |
+
f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
|
41 |
+
color='red'
|
42 |
+
)
|
43 |
+
|
44 |
+
plt.axis("off")
|
45 |
+
|
46 |
+
# Save the modified image to a BytesIO object
|
47 |
+
img_buf = io.BytesIO()
|
48 |
+
plt.savefig(img_buf, format='png',
|
49 |
+
bbox_inches='tight',
|
50 |
+
pad_inches=0)
|
51 |
+
img_buf.seek(0)
|
52 |
+
modified_image = Image.open(img_buf)
|
53 |
+
|
54 |
+
# Close the plot to prevent it from being displayed
|
55 |
+
plt.close()
|
56 |
+
|
57 |
+
return modified_image
|
58 |
+
|
59 |
+
def summarize_predictions_natural_language(predictions):
|
60 |
+
summary = {}
|
61 |
+
p = inflect.engine()
|
62 |
+
|
63 |
+
for prediction in predictions:
|
64 |
+
label = prediction['label']
|
65 |
+
if label in summary:
|
66 |
+
summary[label] += 1
|
67 |
+
else:
|
68 |
+
summary[label] = 1
|
69 |
+
|
70 |
+
result_string = "In this image, there are "
|
71 |
+
for i, (label, count) in enumerate(summary.items()):
|
72 |
+
count_string = p.number_to_words(count)
|
73 |
+
result_string += f"{count_string} {label}"
|
74 |
+
if count > 1:
|
75 |
+
result_string += "s"
|
76 |
+
|
77 |
+
result_string += " "
|
78 |
+
|
79 |
+
if i == len(summary) - 2:
|
80 |
+
result_string += "and "
|
81 |
+
|
82 |
+
# Remove the trailing comma and space
|
83 |
+
result_string = result_string.rstrip(', ') + "."
|
84 |
+
|
85 |
+
return result_string
|
86 |
+
|
87 |
+
|
88 |
+
##### To ignore warnings #####
|
89 |
+
import warnings
|
90 |
+
import logging
|
91 |
+
from transformers import logging as hf_logging
|
92 |
+
|
93 |
+
def ignore_warnings():
|
94 |
+
# Ignore specific Python warnings
|
95 |
+
warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
|
96 |
+
warnings.filterwarnings("ignore", message="Could not find image processor class")
|
97 |
+
warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")
|
98 |
+
|
99 |
+
# Adjust logging for libraries using the logging module
|
100 |
+
logging.basicConfig(level=logging.ERROR)
|
101 |
+
hf_logging.set_verbosity_error()
|
102 |
+
|
103 |
+
########
|
object_detection.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
3 |
+
from gtts import gTTS
|
4 |
+
import tempfile
|
5 |
+
import subprocess
|
6 |
+
import sys
|
7 |
+
import gradio
|
8 |
+
|
9 |
+
|
10 |
+
def ensure_package_installed(package_name):
|
11 |
+
try:
|
12 |
+
__import__(package_name)
|
13 |
+
except ImportError:
|
14 |
+
print(f"{package_name} package not found. Installing...")
|
15 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
16 |
+
__import__(package_name)
|
17 |
+
|
18 |
+
# Check and install openai
|
19 |
+
ensure_package_installed("gradio")
|
20 |
+
ensure_package_installed("transformers")
|
21 |
+
ensure_package_installed("gtts")
|
22 |
+
|
23 |
+
|
24 |
+
# Load the image captioning model
|
25 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
26 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
27 |
+
|
28 |
+
def generate_description(image):
|
29 |
+
"""Generates a textual description of the given image using a pre-trained BLIP model."""
|
30 |
+
inputs = processor(image, return_tensors="pt").to(model.device)
|
31 |
+
output = model.generate(**inputs)
|
32 |
+
description = processor.decode(output[0], skip_special_tokens=True)
|
33 |
+
return description
|
34 |
+
|
35 |
+
def text_to_speech(text):
|
36 |
+
"""Converts text to speech using gTTS and returns the audio file path."""
|
37 |
+
tts = gTTS(text=text, lang='en')
|
38 |
+
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
39 |
+
tts.save(temp_audio.name)
|
40 |
+
return temp_audio.name
|
41 |
+
|
42 |
+
def process_image(image):
|
43 |
+
"""Processes the uploaded image to generate description and return audio file."""
|
44 |
+
description = generate_description(image)
|
45 |
+
return description
|
46 |
+
|
47 |
+
def get_audio(description):
|
48 |
+
"""Generates the audio file for the given description."""
|
49 |
+
return text_to_speech(description)
|
50 |
+
|
51 |
+
# Build Gradio Interface
|
52 |
+
with gradio.Blocks() as demo:
|
53 |
+
gradio.Markdown("# Image Description and Audio Transcript App")
|
54 |
+
gradio.Markdown("Upload an image to get an AI-generated description. Click the button to hear the description.")
|
55 |
+
|
56 |
+
with gradio.Row():
|
57 |
+
image_input = gradio.Image(type="pil")
|
58 |
+
text_output = gradio.Textbox(label="Generated Description")
|
59 |
+
|
60 |
+
generate_btn = gradio.Button("Generate Description")
|
61 |
+
audio_btn = gradio.Button("Click here for an audio transcript")
|
62 |
+
audio_output = gradio.Audio()
|
63 |
+
|
64 |
+
generate_btn.click(process_image, inputs=[image_input], outputs=[text_output])
|
65 |
+
audio_btn.click(get_audio, inputs=[text_output], outputs=[audio_output])
|
66 |
+
|
67 |
+
# Launch the Gradio app
|
68 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
+
Pillow
|
4 |
+
matplotlib
|
5 |
+
inflect
|
6 |
+
gTTS
|
7 |
+
gradio
|
8 |
+
requests
|