Transcription / app.py
ranjeetsps's picture
Update app.py
8e03dad verified
import gradio as gr
import whisper
from deep_translator import GoogleTranslator
import nltk
nltk.download('punkt')
def transcribe_audio(audio, model_name):
model = whisper.load_model(model_name)
result = model.transcribe(audio)
return result["text"]
def translate_transcript(transcript_text, target_language, max_chunk_length=5000):
print("Translating into", target_language)
translator = GoogleTranslator(source='auto', target=target_language)
# Split content into chunks that attempt to maintain context
chunks = split_text_into_chunks(transcript_text, max_chunk_length)
translated_chunks = []
for chunk in chunks:
# Translate each chunk
translated_chunks.append(translator.translate(chunk.strip()))
# Join all translated chunks into a single string
translated_text = ' '.join(translated_chunks)
return translated_text
def split_text_into_chunks(text, max_chunk_length):
"""
Helper function to split text into chunks that attempt to maintain context.
"""
# Split text into smaller chunks based on logical points (e.g., pauses, transitions)
chunks = []
current_chunk = ""
words = nltk.word_tokenize(text)
for word in words:
if len(current_chunk) + len(word) < max_chunk_length:
current_chunk += word + " "
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = word + " "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# Example usage function
def transcribe_and_translate(audio, target_language):
if not target_language:
target_language = "English"
target_language_code = lang_name_to_code[target_language]
# Transcribe audio
transcript_text = transcribe_audio(audio, model_name="base")
# Translate transcript to the target language
translated_text = translate_transcript(transcript_text, target_language=target_language_code)
return translated_text
# List of top 10 widely used languages with their codes
top_languages = [
("English", "en"),
("Chinese", "zh"),
("Spanish", "es"),
("Hindi", "hi"),
("Arabic", "ar"),
("Portuguese", "pt"),
("Bengali", "bn"),
("Russian", "ru"),
("Japanese", "ja"),
("Punjabi", "pa"),
]
lang_name_to_code = {name: code for name, code in top_languages}
# Gradio interface
demo = gr.Interface(
fn=transcribe_and_translate,
inputs=[
gr.Audio(type="filepath"),
gr.Dropdown(choices=[lang[0] for lang in top_languages], label="Language")
],
outputs="textbox",
)
demo.launch()