Spaces:

zanemotiwala
/

image-recognition-caption

Sleeping

App Files Files Community

zanemotiwala commited on Apr 14

Commit

33cb580

•

1 Parent(s): f009a32

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -28

app.py CHANGED Viewed

@@ -2,39 +2,68 @@ import gradio as gr
 import logging
 from transformers import pipeline
 import torch
-description = "Simple Speech Recognition App"
-title = "This app allows users to record audio through the microphone or upload audio files to be transcribed into text. It uses the speech_recognition library to process audio and extract spoken words. Ideal for quick transcription of short speeches and audio notes."
-asr = pipeline(task="automatic-speech-recognition",
-               model="distil-whisper/distil-small.en")
-# Adjusted function assuming 'asr' expects a file path as input
-def transcribe_speech(audio_file_path):
-    if not audio_file_path:
-        logging.error("No audio file provided.")
-        return "No audio found, please retry."
-    try:
-        logging.info(f"Processing file: {audio_file_path}")
-        output = asr(audio_file_path)  # Assuming `asr` directly takes a file path
-        return output["text"]
-    except Exception as e:
-        logging.error(f"Error during transcription: {str(e)}")
-        return f"Error processing the audio file: {str(e)}"
-logging.basicConfig(level=logging.INFO)
 with gr.Blocks() as demo:
-    with gr.Row():
-        gr.Markdown("# Simple Speech Recognition App")
-    with gr.Row():
-        gr.Markdown("### This app allows you to record or upload audio and see its transcription. Powered by the speech_recognition library.")
-    with gr.Row():
-        mic = gr.Audio(label="Record from Microphone or Upload File", type="filepath")
-        transcribe_button = gr.Button("Transcribe Audio")
-    with gr.Row():
-        transcription = gr.Textbox(label="Transcription", lines=3, placeholder="Transcription will appear here...")
-    transcribe_button.click(transcribe_speech, inputs=mic, outputs=transcription)
 demo.launch(share=True)

 import logging
 from transformers import pipeline
 import torch
+import requests, json
+description = "Image Recognition & Generation"
+title = "This app allows users to upload an image, generation a caption of the image, then use that caption to generate a new image. Isn't it fun!"
+#Here we are going to call multiple endpoints!
+def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
+    headers = {
+      "Authorization": f"Bearer {hf_api_key}",
+      "Content-Type": "application/json"
+    }
+    data = { "inputs": inputs }
+    if parameters is not None:
+        data.update({"parameters": parameters})
+    response = requests.request("POST",
+                                ENDPOINT_URL,
+                                headers=headers,
+                                data=json.dumps(data))
+    return json.loads(response.content.decode("utf-8"))
+#text-to-image
+TTI_ENDPOINT = os.environ['HF_API_TTI_BASE']
+#image-to-text
+ITT_ENDPOINT = os.environ['HF_API_ITT_BASE']
+def image_to_base64_str(pil_image):
+    byte_arr = io.BytesIO()
+    pil_image.save(byte_arr, format='PNG')
+    byte_arr = byte_arr.getvalue()
+    return str(base64.b64encode(byte_arr).decode('utf-8'))
+def base64_to_pil(img_base64):
+    base64_decoded = base64.b64decode(img_base64)
+    byte_stream = io.BytesIO(base64_decoded)
+    pil_image = Image.open(byte_stream)
+    return pil_image
+def captioner(image):
+    base64_image = image_to_base64_str(image)
+    result = get_completion(base64_image, None, ITT_ENDPOINT)
+    return result[0]['generated_text']
+def generate(prompt):
+    output = get_completion(prompt, None, TTI_ENDPOINT)
+    result_image = base64_to_pil(output)
+    return result_image
+def caption_and_generate(image):
+    caption = captioner(image)
+    image = generate(caption)
+    return [caption, image]
 with gr.Blocks() as demo:
+    gr.Markdown("# Describe-and-Generate game 🖍️")
+    image_upload = gr.Image(label="Your first image",type="pil")
+    btn_all = gr.Button("Caption and generate")
+    caption = gr.Textbox(label="Generated caption")
+    image_output = gr.Image(label="Generated Image")
+    btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])
+gr.close_all()
 demo.launch(share=True)