StephaneBah commited on
Commit
dc7109c
1 Parent(s): 3b6ee3b
Files changed (1) hide show
  1. app.py +47 -2
app.py CHANGED
@@ -1,4 +1,49 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
+ from diffusers import DiffusionPipeline
4
+ import torch
5
+ import accelerate
6
+
7
+ # Load the models and tokenizers
8
+ translation_model_name = "google/madlad400-3b-mt"
9
+ translation_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name)
10
+ translation_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
11
+
12
+ transcription_model = "chrisjay/fonxlsr"
13
+
14
+ diffusion_model_name = "stabilityai/stable-diffusion-xl-base-1.0"
15
+ diffusion_pipeline = DiffusionPipeline.from_pretrained(diffusion_model_name, torch_dtype=torch.float16)
16
+ diffusion_pipeline = diffusion_pipeline.to("cuda")
17
+
18
+ # Define the translation and transcription pipeline with accelerate
19
+ translation_pipeline = pipeline("translation", model=translation_model, tokenizer=translation_tokenizer, device_map="auto")
20
+ transcription_pipeline = pipeline("automatic-speech-recognition", model=transcription_model, device_map="auto")
21
+
22
+ # Define the function for transcribing and translating audio in Fon
23
+ def transcribe_and_translate_audio_fon(audio_path, num_images=1):
24
+ # Transcribe the audio to Fon using the transcription pipeline
25
+ transcription_fon = transcription_pipeline(audio_path)["text"]
26
+
27
+ # Translate the Fon transcription to French using the translation pipeline
28
+ translation_result = translation_pipeline(transcription_fon, source_lang="fon", target_lang="fr")
29
+ translation_fr = translation_result[0]["translation_text"]
30
+
31
+ images = diffusion_pipeline(translation_fr, num_images_per_prompt=num_images)["images"]
32
+
33
+ return images
34
+
35
+ # Create a Streamlit app
36
+ st.title("Fon Audio to Image Translation")
37
+
38
+ # Upload audio file
39
+ audio_file = st.file_uploader("Upload an audio file", type=["wav"])
40
+
41
+ # Transcribe, translate and generate images
42
+ if audio_file:
43
+ images = transcribe_and_translate_audio_fon(audio_file)
44
+ st.image(images[0])
45
+
46
+
47
+ # Use Accelerate to distribute the computation across available GPUs
48
+ #images = accelerate.launch(transcribe_and_translate_and_generate, audio_file="Fongbe_Speech_Dataset/Fongbe_Speech_Dataset/fongbe_speech_audio_files/wav/64_fongbe_6b36d45b77344caeb1c8d773303c9dcb_for_validation_2022-03-11-23-50-13.wav", num_images=2)
49