frnka commited on
Commit
fb9c6a7
1 Parent(s): 2bd5746
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import os
4
+ import deepl
5
+ import openai
6
+ from PIL import Image
7
+ import requests
8
+ from io import BytesIO
9
+
10
+ TARGET_LANG = "EN-GB"
11
+
12
+ deepl_key = os.environ.get('DEEPL')
13
+ openai.api_key = os.environ.get('OPENAI')
14
+
15
+ whisper = pipeline(model="openai/whisper-small") # change to "your-username/the-name-you-picked"
16
+ translator = deepl.Translator(deepl_key)
17
+
18
+
19
+ def transcribe(audio):
20
+ text_sv = whisper(audio)["text"]
21
+ print(f"Audio transcribed: {text_sv}")
22
+ text_en = translator.translate_text(text_sv, target_lang=TARGET_LANG).text
23
+ print(f"Text translated: {text_en}")
24
+ res = openai.Image.create(
25
+ prompt=text_en,
26
+ n=1,
27
+ size="512x512"
28
+ )
29
+ img_url = res['data'][0]['url']
30
+ print(f"Image generated: {img_url}")
31
+ response = requests.get(img_url)
32
+ img = Image.open(BytesIO(response.content))
33
+
34
+ return text_sv, text_en, img
35
+
36
+
37
+ iface = gr.Interface(
38
+ fn=transcribe,
39
+ inputs=gr.Audio(source="microphone", type="filepath"),
40
+ outputs=[gr.Textbox(label="Transcribed text"),
41
+ gr.Textbox(label="English translation"),
42
+ gr.Image(type="pil", label="Output image")],
43
+ title="Swedish speech to image",
44
+ description="You may have heard of text to image, or speach to text, but have you heard of speech to image? Now you have!",
45
+ )
46
+
47
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ hopsworks
2
+ joblib
3
+ scikit-learn