Commit
•
33cb580
1
Parent(s):
f009a32
Update app.py
Browse files
app.py
CHANGED
@@ -2,39 +2,68 @@ import gradio as gr
|
|
2 |
import logging
|
3 |
from transformers import pipeline
|
4 |
import torch
|
|
|
5 |
|
6 |
-
description = "
|
7 |
-
title = "This app allows users to
|
8 |
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
return "No audio found, please retry."
|
17 |
-
try:
|
18 |
-
logging.info(f"Processing file: {audio_file_path}")
|
19 |
-
output = asr(audio_file_path) # Assuming `asr` directly takes a file path
|
20 |
-
return output["text"]
|
21 |
-
except Exception as e:
|
22 |
-
logging.error(f"Error during transcription: {str(e)}")
|
23 |
-
return f"Error processing the audio file: {str(e)}"
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
with gr.Blocks() as demo:
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
transcribe_button.click(transcribe_speech, inputs=mic, outputs=transcription)
|
39 |
|
40 |
demo.launch(share=True)
|
|
|
2 |
import logging
|
3 |
from transformers import pipeline
|
4 |
import torch
|
5 |
+
import requests, json
|
6 |
|
7 |
+
description = "Image Recognition & Generation"
|
8 |
+
title = "This app allows users to upload an image, generation a caption of the image, then use that caption to generate a new image. Isn't it fun!"
|
9 |
|
10 |
+
#Here we are going to call multiple endpoints!
|
11 |
+
def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
|
12 |
+
headers = {
|
13 |
+
"Authorization": f"Bearer {hf_api_key}",
|
14 |
+
"Content-Type": "application/json"
|
15 |
+
}
|
16 |
+
data = { "inputs": inputs }
|
17 |
+
if parameters is not None:
|
18 |
+
data.update({"parameters": parameters})
|
19 |
+
response = requests.request("POST",
|
20 |
+
ENDPOINT_URL,
|
21 |
+
headers=headers,
|
22 |
+
data=json.dumps(data))
|
23 |
+
return json.loads(response.content.decode("utf-8"))
|
24 |
|
25 |
+
#text-to-image
|
26 |
+
TTI_ENDPOINT = os.environ['HF_API_TTI_BASE']
|
27 |
+
#image-to-text
|
28 |
+
ITT_ENDPOINT = os.environ['HF_API_ITT_BASE']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
def image_to_base64_str(pil_image):
|
31 |
+
byte_arr = io.BytesIO()
|
32 |
+
pil_image.save(byte_arr, format='PNG')
|
33 |
+
byte_arr = byte_arr.getvalue()
|
34 |
+
return str(base64.b64encode(byte_arr).decode('utf-8'))
|
35 |
+
|
36 |
+
def base64_to_pil(img_base64):
|
37 |
+
base64_decoded = base64.b64decode(img_base64)
|
38 |
+
byte_stream = io.BytesIO(base64_decoded)
|
39 |
+
pil_image = Image.open(byte_stream)
|
40 |
+
return pil_image
|
41 |
+
|
42 |
+
def captioner(image):
|
43 |
+
base64_image = image_to_base64_str(image)
|
44 |
+
result = get_completion(base64_image, None, ITT_ENDPOINT)
|
45 |
+
return result[0]['generated_text']
|
46 |
+
|
47 |
+
def generate(prompt):
|
48 |
+
output = get_completion(prompt, None, TTI_ENDPOINT)
|
49 |
+
result_image = base64_to_pil(output)
|
50 |
+
return result_image
|
51 |
+
|
52 |
+
def caption_and_generate(image):
|
53 |
+
caption = captioner(image)
|
54 |
+
image = generate(caption)
|
55 |
+
return [caption, image]
|
56 |
|
57 |
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("# Describe-and-Generate game 🖍️")
|
59 |
+
image_upload = gr.Image(label="Your first image",type="pil")
|
60 |
+
btn_all = gr.Button("Caption and generate")
|
61 |
+
caption = gr.Textbox(label="Generated caption")
|
62 |
+
image_output = gr.Image(label="Generated Image")
|
63 |
+
|
64 |
+
btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])
|
65 |
+
|
66 |
+
|
67 |
+
gr.close_all()
|
|
|
68 |
|
69 |
demo.launch(share=True)
|