Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import tempfile | |
| import soundfile as sf | |
| from transformers import pipeline | |
| # Load models | |
| transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1) | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1) | |
| question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1) | |
| # Upload audio file | |
| uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"]) | |
| if uploaded_file is not None: | |
| # Save the uploaded file to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file: | |
| temp_audio_file.write(uploaded_file.getbuffer()) | |
| temp_audio_path = temp_audio_file.name | |
| # Read the audio file using SoundFile | |
| try: | |
| # Load audio data | |
| audio_data, sample_rate = sf.read(temp_audio_path) | |
| # Transcribing audio | |
| lecture_text = transcriber(temp_audio_path)["text"] | |
| # Preprocessing data | |
| num_words = len(lecture_text.split()) | |
| max_length = min(num_words, 1024) # BART model max input length is 1024 tokens | |
| max_length = int(max_length * 0.75) # Convert max words to approx tokens | |
| if max_length > 1024: | |
| lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit | |
| # Summarization | |
| summary = summarizer( | |
| lecture_text, | |
| max_length=1024, # DistilBART max input length is 1024 tokens | |
| min_length=int(max_length * 0.1), | |
| truncation=True | |
| ) | |
| # Clean up the summary text | |
| if not summary[0]["summary_text"].endswith((".", "!", "?")): | |
| last_period_index = summary[0]["summary_text"].rfind(".") | |
| if last_period_index != -1: | |
| summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1] | |
| # Questions Generation | |
| context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions." | |
| questions = question_generator(context, max_new_tokens=50) | |
| # Output | |
| st.write("\nSummary:\n", summary[0]["summary_text"]) | |
| for question in questions: | |
| st.write(question["generated_text"]) # Output the generated questions | |
| except Exception as e: | |
| st.error(f"Error during processing: {str(e)}") | |