Spaces:

aryankeesara
/

audiosummarymodel

Sleeping

Cryptic

test

eb91ddc about 1 year ago

2.41 kB

	import streamlit as st
	import tempfile
	import soundfile as sf
	from transformers import pipeline

	# Load models
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
	question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)

	# Upload audio file
	uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])

	if uploaded_file is not None:
	# Save the uploaded file to a temporary file
	with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
	temp_audio_file.write(uploaded_file.getbuffer())
	temp_audio_path = temp_audio_file.name

	# Read the audio file using SoundFile
	try:
	# Load audio data
	audio_data, sample_rate = sf.read(temp_audio_path)

	# Transcribing audio
	lecture_text = transcriber(temp_audio_path)["text"]

	# Preprocessing data
	num_words = len(lecture_text.split())
	max_length = min(num_words, 1024) # BART model max input length is 1024 tokens
	max_length = int(max_length * 0.75) # Convert max words to approx tokens

	if max_length > 1024:
	lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit

	# Summarization
	summary = summarizer(
	lecture_text,
	max_length=1024, # DistilBART max input length is 1024 tokens
	min_length=int(max_length * 0.1),
	truncation=True
	)

	# Clean up the summary text
	if not summary[0]["summary_text"].endswith((".", "!", "?")):
	last_period_index = summary[0]["summary_text"].rfind(".")
	if last_period_index != -1:
	summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1]

	# Questions Generation
	context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
	questions = question_generator(context, max_new_tokens=50)

	# Output
	st.write("\nSummary:\n", summary[0]["summary_text"])
	for question in questions:
	st.write(question["generated_text"]) # Output the generated questions

	except Exception as e:
	st.error(f"Error during processing: {str(e)}")