abdullahedcults commited on
Commit
3dd86d3
1 Parent(s): 6557988

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -8
app.py CHANGED
@@ -1,12 +1,63 @@
1
  import streamlit as st
2
  from audio_recorder_streamlit import audio_recorder
 
 
 
3
 
4
- audio_bytes = audio_recorder()
5
- print(audio_bytes)
6
- if audio_bytes:
7
- st.audio(audio_bytes, format="audio/wav")
8
 
9
- # To save audio to a file:
10
- wav_file = open("audio.mp3", "wb")
11
- wav_file.write(audio_bytes)
12
- print('Output dump is there')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from audio_recorder_streamlit import audio_recorder
3
+ import time
4
+ import re
5
+ import os
6
 
7
+ import whisper
8
+ model = whisper.load_model('medium')
 
 
9
 
10
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
11
+
12
+ #loading the tokenizer and the model
13
+ tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
14
+ model_hindi = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
15
+
16
+ def translator(text):
17
+ # function to translate English text to Hindi
18
+ input_ids = tokenizer.encode(text, return_tensors="pt", padding=True)
19
+ outputs = model_hindi.generate(input_ids)
20
+ decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
+ return decoded_text
22
+
23
+ def split_sentences(generated_text):
24
+ split_text = re.split(r'(?<!,)[.!?]', generated_text)
25
+ split_text = [sentence.strip() for sentence in split_text]
26
+ return split_text
27
+
28
+ def transcribe(audio):
29
+ result = model.transcribe(audio)
30
+ generated_text = result["text"]
31
+
32
+ def process_transcription(generated_text):
33
+ generated_text = split_sentences(generated_text)
34
+ processed_text = ""
35
+
36
+ for text in generated_text:
37
+ translated_text = translator(text)
38
+ processed_text += translated_text + " "
39
+
40
+ return processed_text
41
+
42
+ text_hindi = process_transcription(generated_text)
43
+ return result["text"], text_hindi
44
+
45
+ def main():
46
+ st.title("Translate and Transcribe Audio")
47
+
48
+ st.write("Click the 'Start Recording' button to start recording your voice. Press 'Stop Recording' when done.")
49
+ st.write("The transcribed text will be displayed below.")
50
+
51
+ audio_bytes = audio_recorder()
52
+ if audio_bytes:
53
+ with st.spinner("Transcribing audio... Please wait."):
54
+ result_text, translated_text = transcribe(audio_bytes)
55
+
56
+ st.subheader("Original Text (English):")
57
+ st.write(result_text)
58
+
59
+ st.subheader("Translated Text (Hindi):")
60
+ st.write(translated_text)
61
+
62
+ if __name__ == "__main__":
63
+ main()