Prince9191 commited on
Commit
2183303
·
verified ·
1 Parent(s): 96d7fb7

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +22 -0
  2. helper.py +103 -0
  3. object_detection.py +68 -0
  4. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from object_detection import run_od_pipeline
4
+
5
+ # Streamlit Page Setup
6
+ st.set_page_config(page_title="🧠 Object Detection App", layout="centered")
7
+ st.title("🧠 Object Detection using Hugging Face")
8
+
9
+ st.markdown("Upload an image to run object detection:")
10
+
11
+ # Upload image
12
+ uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
13
+
14
+ if uploaded_file:
15
+ image = Image.open(uploaded_file)
16
+ st.image(image, caption="Uploaded Image", use_column_width=True)
17
+
18
+ # Run object detection
19
+ st.write("Running object detection...")
20
+ detected_img = run_od_pipeline(image)
21
+
22
+ st.image(detected_img, caption="Detected Objects", use_column_width=True)
helper.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """helper.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1IDhEhDLbnCTaBfIbuMtlNFW3ntQiZBwA
8
+ """
9
+
10
+ import io
11
+ import matplotlib.pyplot as plt
12
+ import requests
13
+ import inflect
14
+ from PIL import Image
15
+
16
+ def load_image_from_url(url):
17
+ return Image.open(requests.get(url, stream=True).raw)
18
+
19
+ def render_results_in_image(in_pil_img, in_results):
20
+ plt.figure(figsize=(16, 10))
21
+ plt.imshow(in_pil_img)
22
+
23
+ ax = plt.gca()
24
+
25
+ for prediction in in_results:
26
+
27
+ x, y = prediction['box']['xmin'], prediction['box']['ymin']
28
+ w = prediction['box']['xmax'] - prediction['box']['xmin']
29
+ h = prediction['box']['ymax'] - prediction['box']['ymin']
30
+
31
+ ax.add_patch(plt.Rectangle((x, y),
32
+ w,
33
+ h,
34
+ fill=False,
35
+ color="green",
36
+ linewidth=2))
37
+ ax.text(
38
+ x,
39
+ y,
40
+ f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
41
+ color='red'
42
+ )
43
+
44
+ plt.axis("off")
45
+
46
+ # Save the modified image to a BytesIO object
47
+ img_buf = io.BytesIO()
48
+ plt.savefig(img_buf, format='png',
49
+ bbox_inches='tight',
50
+ pad_inches=0)
51
+ img_buf.seek(0)
52
+ modified_image = Image.open(img_buf)
53
+
54
+ # Close the plot to prevent it from being displayed
55
+ plt.close()
56
+
57
+ return modified_image
58
+
59
+ def summarize_predictions_natural_language(predictions):
60
+ summary = {}
61
+ p = inflect.engine()
62
+
63
+ for prediction in predictions:
64
+ label = prediction['label']
65
+ if label in summary:
66
+ summary[label] += 1
67
+ else:
68
+ summary[label] = 1
69
+
70
+ result_string = "In this image, there are "
71
+ for i, (label, count) in enumerate(summary.items()):
72
+ count_string = p.number_to_words(count)
73
+ result_string += f"{count_string} {label}"
74
+ if count > 1:
75
+ result_string += "s"
76
+
77
+ result_string += " "
78
+
79
+ if i == len(summary) - 2:
80
+ result_string += "and "
81
+
82
+ # Remove the trailing comma and space
83
+ result_string = result_string.rstrip(', ') + "."
84
+
85
+ return result_string
86
+
87
+
88
+ ##### To ignore warnings #####
89
+ import warnings
90
+ import logging
91
+ from transformers import logging as hf_logging
92
+
93
+ def ignore_warnings():
94
+ # Ignore specific Python warnings
95
+ warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
96
+ warnings.filterwarnings("ignore", message="Could not find image processor class")
97
+ warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")
98
+
99
+ # Adjust logging for libraries using the logging module
100
+ logging.basicConfig(level=logging.ERROR)
101
+ hf_logging.set_verbosity_error()
102
+
103
+ ########
object_detection.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ from gtts import gTTS
4
+ import tempfile
5
+ import subprocess
6
+ import sys
7
+ import gradio
8
+
9
+
10
+ def ensure_package_installed(package_name):
11
+ try:
12
+ __import__(package_name)
13
+ except ImportError:
14
+ print(f"{package_name} package not found. Installing...")
15
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
16
+ __import__(package_name)
17
+
18
+ # Check and install openai
19
+ ensure_package_installed("gradio")
20
+ ensure_package_installed("transformers")
21
+ ensure_package_installed("gtts")
22
+
23
+
24
+ # Load the image captioning model
25
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
26
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
27
+
28
+ def generate_description(image):
29
+ """Generates a textual description of the given image using a pre-trained BLIP model."""
30
+ inputs = processor(image, return_tensors="pt").to(model.device)
31
+ output = model.generate(**inputs)
32
+ description = processor.decode(output[0], skip_special_tokens=True)
33
+ return description
34
+
35
+ def text_to_speech(text):
36
+ """Converts text to speech using gTTS and returns the audio file path."""
37
+ tts = gTTS(text=text, lang='en')
38
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
39
+ tts.save(temp_audio.name)
40
+ return temp_audio.name
41
+
42
+ def process_image(image):
43
+ """Processes the uploaded image to generate description and return audio file."""
44
+ description = generate_description(image)
45
+ return description
46
+
47
+ def get_audio(description):
48
+ """Generates the audio file for the given description."""
49
+ return text_to_speech(description)
50
+
51
+ # Build Gradio Interface
52
+ with gradio.Blocks() as demo:
53
+ gradio.Markdown("# Image Description and Audio Transcript App")
54
+ gradio.Markdown("Upload an image to get an AI-generated description. Click the button to hear the description.")
55
+
56
+ with gradio.Row():
57
+ image_input = gradio.Image(type="pil")
58
+ text_output = gradio.Textbox(label="Generated Description")
59
+
60
+ generate_btn = gradio.Button("Generate Description")
61
+ audio_btn = gradio.Button("Click here for an audio transcript")
62
+ audio_output = gradio.Audio()
63
+
64
+ generate_btn.click(process_image, inputs=[image_input], outputs=[text_output])
65
+ audio_btn.click(get_audio, inputs=[text_output], outputs=[audio_output])
66
+
67
+ # Launch the Gradio app
68
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ Pillow
4
+ matplotlib
5
+ inflect
6
+ gTTS
7
+ gradio
8
+ requests