Spaces:

Khd-B
/

Study_Assistant

Sleeping

App Files Files Community

Study_Assistant / app.py

Khd-B

Update app.py

51c0f70 verified 10 months ago

raw

history blame contribute delete

2.06 kB

	import pdfplumber
	from sentence_transformers import SentenceTransformer
	import streamlit as st
	from gtts import gTTS
	import os
	from sklearn.metrics.pairwise import cosine_similarity

	# Function to extract text from a limited number of pages in a PDF
	@st.cache_resource
	def load_pdf_and_extract_text(pdf_path, max_pages=20):
	all_sentences = []
	with pdfplumber.open(pdf_path) as pdf:
	total_pages = len(pdf.pages)
	st.write(f"Total pages to process: {total_pages}")
	for i, page in enumerate(pdf.pages):
	if i >= max_pages:
	break
	st.write(f"Processing page {i + 1}...")
	text = page.extract_text()
	if text:
	all_sentences.extend(text.split('. '))
	st.progress((i + 1) / max_pages) # Update progress
	return all_sentences

	# Load your PDF file
	pdf_path = "Accounting.pdf" # Ensure this is uploaded to your space
	all_sentences = load_pdf_and_extract_text(pdf_path)

	# Initialize the model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Create embeddings from extracted sentences
	@st.cache_resource
	def create_embeddings(sentences):
	return model.encode(sentences, convert_to_tensor=True)

	pdf_embeddings = create_embeddings(all_sentences)

	# Function to respond to user query
	def respond_to_query(query):
	query_embedding = model.encode(query, convert_to_tensor=True)
	similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
	best_match_index = similarities.argmax()
	response = all_sentences[best_match_index]
	return response

	# Streamlit app
	st.title("Study Assistant")

	query = st.text_input("Type your question:")
	submit_button = st.button("Ask")

	if submit_button:
	if query:
	response = respond_to_query(query)

	# Text-to-Speech
	tts = gTTS(response)
	tts.save("response.mp3")

	# (Optional) Playing audio might not work in Spaces, consider alternatives
	st.write(response)
	else:
	st.write("Please enter a question.")