zanemotiwala commited on
Commit
33cb580
1 Parent(s): f009a32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -28
app.py CHANGED
@@ -2,39 +2,68 @@ import gradio as gr
2
  import logging
3
  from transformers import pipeline
4
  import torch
 
5
 
6
- description = "Simple Speech Recognition App"
7
- title = "This app allows users to record audio through the microphone or upload audio files to be transcribed into text. It uses the speech_recognition library to process audio and extract spoken words. Ideal for quick transcription of short speeches and audio notes."
8
 
9
- asr = pipeline(task="automatic-speech-recognition",
10
- model="distil-whisper/distil-small.en")
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Adjusted function assuming 'asr' expects a file path as input
13
- def transcribe_speech(audio_file_path):
14
- if not audio_file_path:
15
- logging.error("No audio file provided.")
16
- return "No audio found, please retry."
17
- try:
18
- logging.info(f"Processing file: {audio_file_path}")
19
- output = asr(audio_file_path) # Assuming `asr` directly takes a file path
20
- return output["text"]
21
- except Exception as e:
22
- logging.error(f"Error during transcription: {str(e)}")
23
- return f"Error processing the audio file: {str(e)}"
24
 
25
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  with gr.Blocks() as demo:
28
- with gr.Row():
29
- gr.Markdown("# Simple Speech Recognition App")
30
- with gr.Row():
31
- gr.Markdown("### This app allows you to record or upload audio and see its transcription. Powered by the speech_recognition library.")
32
- with gr.Row():
33
- mic = gr.Audio(label="Record from Microphone or Upload File", type="filepath")
34
- transcribe_button = gr.Button("Transcribe Audio")
35
- with gr.Row():
36
- transcription = gr.Textbox(label="Transcription", lines=3, placeholder="Transcription will appear here...")
37
-
38
- transcribe_button.click(transcribe_speech, inputs=mic, outputs=transcription)
39
 
40
  demo.launch(share=True)
 
2
  import logging
3
  from transformers import pipeline
4
  import torch
5
+ import requests, json
6
 
7
+ description = "Image Recognition & Generation"
8
+ title = "This app allows users to upload an image, generation a caption of the image, then use that caption to generate a new image. Isn't it fun!"
9
 
10
+ #Here we are going to call multiple endpoints!
11
+ def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
12
+ headers = {
13
+ "Authorization": f"Bearer {hf_api_key}",
14
+ "Content-Type": "application/json"
15
+ }
16
+ data = { "inputs": inputs }
17
+ if parameters is not None:
18
+ data.update({"parameters": parameters})
19
+ response = requests.request("POST",
20
+ ENDPOINT_URL,
21
+ headers=headers,
22
+ data=json.dumps(data))
23
+ return json.loads(response.content.decode("utf-8"))
24
 
25
+ #text-to-image
26
+ TTI_ENDPOINT = os.environ['HF_API_TTI_BASE']
27
+ #image-to-text
28
+ ITT_ENDPOINT = os.environ['HF_API_ITT_BASE']
 
 
 
 
 
 
 
 
29
 
30
+ def image_to_base64_str(pil_image):
31
+ byte_arr = io.BytesIO()
32
+ pil_image.save(byte_arr, format='PNG')
33
+ byte_arr = byte_arr.getvalue()
34
+ return str(base64.b64encode(byte_arr).decode('utf-8'))
35
+
36
+ def base64_to_pil(img_base64):
37
+ base64_decoded = base64.b64decode(img_base64)
38
+ byte_stream = io.BytesIO(base64_decoded)
39
+ pil_image = Image.open(byte_stream)
40
+ return pil_image
41
+
42
+ def captioner(image):
43
+ base64_image = image_to_base64_str(image)
44
+ result = get_completion(base64_image, None, ITT_ENDPOINT)
45
+ return result[0]['generated_text']
46
+
47
+ def generate(prompt):
48
+ output = get_completion(prompt, None, TTI_ENDPOINT)
49
+ result_image = base64_to_pil(output)
50
+ return result_image
51
+
52
+ def caption_and_generate(image):
53
+ caption = captioner(image)
54
+ image = generate(caption)
55
+ return [caption, image]
56
 
57
  with gr.Blocks() as demo:
58
+ gr.Markdown("# Describe-and-Generate game 🖍️")
59
+ image_upload = gr.Image(label="Your first image",type="pil")
60
+ btn_all = gr.Button("Caption and generate")
61
+ caption = gr.Textbox(label="Generated caption")
62
+ image_output = gr.Image(label="Generated Image")
63
+
64
+ btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])
65
+
66
+
67
+ gr.close_all()
 
68
 
69
  demo.launch(share=True)