AyeshaAmeen's picture
Update app.py
ae27bcb verified
import assemblyai as aai
from transformers import T5Tokenizer, T5ForConditionalGeneration
from deep_translator import GoogleTranslator
import spacy
import gradio as gr
from pydub import AudioSegment
import os
# Step 1: Set AssemblyAI API Key
aai.settings.api_key = "00f66859f24e4cefa15c9beefa13e4ce"
transcriber = aai.Transcriber()
def transcribe_audio(audio_file_path):
transcript = transcriber.transcribe(audio_file_path)
transcription = transcript.text
return transcription
# Step 2: Language Translation (English and Urdu) with chunking
def translate_text(text, target_language):
translator = GoogleTranslator(source='auto', target=target_language)
chunk_size = 5000
translated_chunks = []
for i in range(0, len(text), chunk_size):
chunk = text[i:i + chunk_size]
translated_chunk = translator.translate(chunk)
translated_chunks.append(translated_chunk)
translated_text = " ".join(translated_chunks)
return translated_text
# Step 3: Summarization with T5 Model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model_t5 = T5ForConditionalGeneration.from_pretrained('t5-base')
def summarize_text(text, source_language, target_language):
if source_language == 'urdu':
text = translate_text(text, 'en') # Translate to English for summarization
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
summary_ids = model_t5.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
if source_language == 'urdu':
summary = translate_text(summary, target_language) # Translate back to Urdu
return summary
# Step 4: Key Points Extraction with spaCy
nlp = spacy.load("en_core_web_sm")
def extract_key_points(text):
doc = nlp(text)
tasks = []
for ent in doc.ents:
if ent.label_ in ["TASK", "DATE", "PERSON", "ORG"]:
tasks.append(ent.text)
return tasks
# Ensure the directory exists
output_dir = "/content/"
os.makedirs(output_dir, exist_ok=True)
# Step 5: Gradio Interface Setup
def process_meeting(file, language):
file_path = file.name
audio_path = os.path.join(output_dir, "uploaded_audio.wav")
# Convert video to audio if necessary
if file_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
video = AudioSegment.from_file(file_path)
video.export(audio_path, format="wav")
else:
audio_path = file_path
transcription = transcribe_audio(audio_path)
# Step 2: Translation based on user-selected language
if language == "urdu":
translated_text = translate_text(transcription, 'ur')
else: # default to English
translated_text = transcription
# Step 3: Summarization and Key Points Extraction
summary = summarize_text(translated_text, language, 'ur')
key_points = extract_key_points(translated_text)
return transcription, translated_text, key_points, summary
# Step 6: Launch Gradio Interface
iface = gr.Interface(
fn=process_meeting,
inputs=[
gr.File(label="Upload Meeting Recording"),
gr.Radio(["english", "urdu"], label="Select Summary Language")
],
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="Translated Text"),
gr.Textbox(label="Key Points"),
gr.Textbox(label="Summary")
],
title="Smart AI Meeting Assistant",
description="""
<div style='text-align: center;'>by Ayesha Ameen & Sana Sadiq</div>
<br>Upload your meeting recording and choose summary language (English or Urdu).
""",
)
iface.launch(share=True, debug=True)