rexsimiloluwah commited on
Commit
4ddd43e
1 Parent(s): bcb1eec

added more applications

Browse files
app.py CHANGED
@@ -1,16 +1,34 @@
1
  import gradio as gr
2
 
3
- from tasks.asr import (
4
  mic_transcribe_interface,
5
  file_transcribe_interface
6
  )
 
 
 
 
7
 
8
  app = gr.Blocks()
9
 
10
  with app:
11
  gr.TabbedInterface(
12
- [mic_transcribe_interface, file_transcribe_interface],
13
- ["Transcribe from Microphone", "Transcribe from Audio File"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
  app.launch(share=True)
 
1
  import gradio as gr
2
 
3
+ from apps.asr import (
4
  mic_transcribe_interface,
5
  file_transcribe_interface
6
  )
7
+ from apps.object_detection import obj_detection_interface
8
+ from apps.image_captioning import img_captioning_interface
9
+ from apps.multimodal_visual_qa import multimodal_visual_qa_interface
10
+ from apps.ner import ner_interface
11
 
12
  app = gr.Blocks()
13
 
14
  with app:
15
  gr.TabbedInterface(
16
+ [
17
+ mic_transcribe_interface,
18
+ file_transcribe_interface,
19
+ obj_detection_interface,
20
+ img_captioning_interface,
21
+ multimodal_visual_qa_interface,
22
+ ner_interface
23
+ ],
24
+ [
25
+ "Transcribe from Microphone",
26
+ "Transcribe from Audio File",
27
+ "Detect Objects from an Image",
28
+ "Generate a Caption for an Image",
29
+ "Perform QA on an Image",
30
+ "Named Entity Recogntion"
31
+ ]
32
  )
33
 
34
  app.launch(share=True)
{tasks → apps}/__init__.py RENAMED
File without changes
apps/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (170 Bytes). View file
 
apps/__pycache__/asr.cpython-311.pyc ADDED
Binary file (1.81 kB). View file
 
{tasks → apps}/asr.py RENAMED
File without changes
apps/image_captioning.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor
3
+ from transformers import BlipForConditionalGeneration
4
+
5
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
6
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
7
+
8
+ def caption_image(image):
9
+ inputs = processor(image, return_tensors="pt")
10
+ output = model.generate(**inputs)
11
+ caption = processor.decode(output[0], skip_special_tokens=True)
12
+
13
+ return caption
14
+
15
+ img_captioning_interface = gr.Interface(
16
+ fn=caption_image,
17
+ inputs=gr.Image(label="Input Image", type="pil"),
18
+ outputs=gr.Textbox(label="Predicted Caption"),
19
+ title="Image Caption Generator App",
20
+ description="This app generates a caption for an image."
21
+ )
apps/multimodal_visual_qa.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor
3
+ from transformers import BlipForQuestionAnswering
4
+
5
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
6
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base")
7
+
8
+ def process_image(image, question: str):
9
+ inputs = processor(image, question, return_tensors="pt")
10
+ output = model.generate(**inputs)
11
+ answer = processor.decode(output[0], skip_special_tokens=True)
12
+
13
+ return answer
14
+
15
+ multimodal_visual_qa_interface = gr.Interface(
16
+ fn=process_image,
17
+ inputs=[
18
+ gr.Image(label="Input Image", type="pil"),
19
+ gr.Textbox(label="Enter question to prompt the image")
20
+ ],
21
+ outputs=gr.Textbox(label="Answer"),
22
+ title="Multimodal Visual QA Application",
23
+ description="This app can help you ask questions about an image"
24
+ )
apps/ner.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ ner_pipeline = pipeline("ner")
5
+
6
+ examples = [
7
+ "Does Chicago have any stores and does Joe live here?",
8
+ ]
9
+
10
+ def ner(text):
11
+ output = ner_pipeline(text)
12
+ return {"text": text, "entities": output}
13
+
14
+ ner_interface = gr.Interface(
15
+ ner,
16
+ gr.Textbox(placeholder="Enter sentence"),
17
+ gr.HighlightedText(),
18
+ examples=examples
19
+ )
apps/object_detection.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+
3
+ import requests
4
+ import numpy as np
5
+ import gradio as gr
6
+ from PIL import Image
7
+ import matplotlib.pyplot as plt
8
+ from transformers import pipeline
9
+
10
+ # Load the pipeline
11
+ obj_detector = pipeline(
12
+ task="object-detection",
13
+ model="facebook/detr-resnet-50"
14
+ )
15
+
16
+ # Object detection utilities
17
+ def load_image_from_url(url: str):
18
+ return Image.open(requests.get(url, stream=True).raw).convert("RGB")
19
+
20
+ def render_results_in_image(img, detection_results):
21
+ plt.figure(figsize=(16, 10))
22
+ plt.imshow(img)
23
+
24
+ ax = plt.gca()
25
+
26
+ for prediction in detection_results:
27
+ x, y = prediction["box"]["xmin"], prediction["box"]["ymin"]
28
+ w = prediction["box"]["xmax"] - prediction["box"]["xmin"]
29
+ h = prediction["box"]["ymax"] - prediction["box"]["ymin"]
30
+
31
+ ax.add_patch(
32
+ plt.Rectangle(
33
+ (x, y),
34
+ w,
35
+ h,
36
+ fill=False,
37
+ color="green",
38
+ linewidth=2
39
+ )
40
+ )
41
+
42
+ ax.text(
43
+ x,
44
+ y,
45
+ f"{prediction['label']}: {round(prediction['score']*100, 1)}%"
46
+ )
47
+ plt.axis("off")
48
+
49
+ # save the modified image to a BytesIO object
50
+ img_buf = io.BytesIO()
51
+ plt.savefig(img_buf, format="png",
52
+ bbox_inches="tight",
53
+ pad_inches=0)
54
+ img_buf.seek(0)
55
+ modified_image = Image.open(img_buf)
56
+
57
+ # close the plot to prevent it from being displayed
58
+ plt.close()
59
+
60
+ return modified_image
61
+
62
+ def summarize_detection_results(detection_results):
63
+ summary = {}
64
+
65
+ for prediction in detection_results:
66
+ label = prediction["label"]
67
+
68
+ if label in summary:
69
+ summary[label] += 1
70
+ else:
71
+ summary[label] = 1
72
+
73
+ summary_string = "In this image, there are "
74
+
75
+ for i, (label, count) in enumerate(summary.items()):
76
+ summary_string += f"{str(count)} {label}"
77
+
78
+ if count > 1:
79
+ summary_string += "s"
80
+
81
+ summary_string += ", "
82
+
83
+ if i == len(summary) - 2:
84
+ summary_string += "and "
85
+
86
+ # remove the trailing comma and space
87
+ summary_string = summary_string.rstrip(", ") + "."
88
+
89
+ return summary_string
90
+
91
+ def detect_objects(image):
92
+ detection_results = obj_detector(image)
93
+
94
+ processed_image = render_results_in_image(image, detection_results)
95
+
96
+ summary_string = summarize_detection_results(detection_results)
97
+
98
+ return processed_image, summary_string
99
+
100
+ obj_detection_interface = gr.Interface(
101
+ fn=detect_objects,
102
+ inputs=gr.Image(label="Input Image", type="pil"),
103
+ outputs=[
104
+ gr.Image(label="Output image with predicted objects", type="pil"),
105
+ gr.Textbox(label="Object detection summary")
106
+ ],
107
+ title="Object Detection Application",
108
+ description="This app detects objects from an image."
109
+ )