Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
from deep_translator import GoogleTranslator | |
import nltk | |
nltk.download('punkt') | |
def transcribe_audio(audio, model_name): | |
model = whisper.load_model(model_name) | |
result = model.transcribe(audio) | |
return result["text"] | |
def translate_transcript(transcript_text, target_language, max_chunk_length=5000): | |
print("Translating into", target_language) | |
translator = GoogleTranslator(source='auto', target=target_language) | |
# Split content into chunks that attempt to maintain context | |
chunks = split_text_into_chunks(transcript_text, max_chunk_length) | |
translated_chunks = [] | |
for chunk in chunks: | |
# Translate each chunk | |
translated_chunks.append(translator.translate(chunk.strip())) | |
# Join all translated chunks into a single string | |
translated_text = ' '.join(translated_chunks) | |
return translated_text | |
def split_text_into_chunks(text, max_chunk_length): | |
""" | |
Helper function to split text into chunks that attempt to maintain context. | |
""" | |
# Split text into smaller chunks based on logical points (e.g., pauses, transitions) | |
chunks = [] | |
current_chunk = "" | |
words = nltk.word_tokenize(text) | |
for word in words: | |
if len(current_chunk) + len(word) < max_chunk_length: | |
current_chunk += word + " " | |
else: | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
current_chunk = word + " " | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
return chunks | |
# Example usage function | |
def transcribe_and_translate(audio, target_language): | |
if not target_language: | |
target_language = "English" | |
target_language_code = lang_name_to_code[target_language] | |
# Transcribe audio | |
transcript_text = transcribe_audio(audio, model_name="base") | |
# Translate transcript to the target language | |
translated_text = translate_transcript(transcript_text, target_language=target_language_code) | |
return translated_text | |
# List of top 10 widely used languages with their codes | |
top_languages = [ | |
("English", "en"), | |
("Chinese", "zh"), | |
("Spanish", "es"), | |
("Hindi", "hi"), | |
("Arabic", "ar"), | |
("Portuguese", "pt"), | |
("Bengali", "bn"), | |
("Russian", "ru"), | |
("Japanese", "ja"), | |
("Punjabi", "pa"), | |
] | |
lang_name_to_code = {name: code for name, code in top_languages} | |
# Gradio interface | |
demo = gr.Interface( | |
fn=transcribe_and_translate, | |
inputs=[ | |
gr.Audio(type="filepath"), | |
gr.Dropdown(choices=[lang[0] for lang in top_languages], label="Language") | |
], | |
outputs="textbox", | |
) | |
demo.launch() | |