Spaces:

tahirsher
/

GenAI_Lawyers_Guidance_App

Sleeping

App Files Files Community

GenAI_Lawyers_Guidance_App / app.py

tahirsher

Update app.py

4565b87 verified 14 days ago

raw

history blame contribute delete

4.53 kB

	import os
	import requests
	import streamlit as st
	from io import BytesIO
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from transformers import pipeline
	import torch

	# Update imports to reflect the new locations
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS

	# Set up the page configuration as the first Streamlit command
	st.set_page_config(page_title="RAG-based PDF Chat", layout="centered", page_icon="📄")

	# Initialize embedding function with the new import
	embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Load the question-answering pipeline model
	@st.cache_resource
	def load_qa_pipeline():
	qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") # Use a text-based QA model
	return qa_pipeline

	qa_pipeline = load_qa_pipeline()

	# Generate summary based on the retrieved text
	def generate_summary_with_qa_pipeline(query, retrieved_text):
	# Use the pipeline to directly answer questions based on document context
	summary = qa_pipeline(question=query, context=retrieved_text)
	return summary["answer"]

	# List of Hugging Face PDF URLs
	PDF_URLS = [
	"https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/administrator92ada0936848e501425591b4ad0cd417.pdf",
	"https://huggingface.co/spaces/tahirsher/GenAI_Lawyers_Guide/blob/main/Pakistan%20Penal%20Code.pdf",
	]

	# Helper function to convert Hugging Face blob URLs to direct download URLs
	def get_huggingface_raw_url(url):
	if "huggingface.co" in url and "/blob/" in url:
	return url.replace("/blob/", "/resolve/")
	return url

	# Fetch and extract text from PDF files hosted on Hugging Face
	def fetch_pdf_text_from_huggingface(urls):
	text = ""
	for url in urls:
	raw_url = get_huggingface_raw_url(url)
	response = requests.get(raw_url)
	if response.status_code == 200:
	pdf_file = BytesIO(response.content)
	try:
	pdf_reader = PdfReader(pdf_file)
	for page in pdf_reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	except Exception as e:
	st.error(f"Failed to read PDF from URL {url}: {e}")
	else:
	st.error(f"Failed to fetch PDF from URL: {url}")
	return text

	# Split text into manageable chunks
	@st.cache_data
	def get_text_chunks(text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
	chunks = text_splitter.split_text(text)
	return chunks

	# Initialize embedding function
	embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Create a FAISS vector store with embeddings
	@st.cache_resource
	def load_or_create_vector_store(text_chunks):
	vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
	return vector_store

	# Generate summary based on the retrieved text
	def generate_summary_with_qa_pipeline(query, retrieved_text):
	# Use the pipeline to directly answer questions based on document context
	summary = qa_pipeline(question=query, context=retrieved_text)
	return summary["answer"] # Directly access the answer

	# Generate response for user query
	def user_input(user_question, vector_store):
	docs = vector_store.similarity_search(user_question)
	context_text = " ".join([doc.page_content for doc in docs])
	return generate_summary_with_qa_pipeline(user_question, context_text)

	# Main function to run the Streamlit app
	def main():
	st.title("📄 Gen AI Lawyers Guide")

	# Load documents from Hugging Face
	raw_text = fetch_pdf_text_from_huggingface(PDF_URLS)
	text_chunks = get_text_chunks(raw_text)
	vector_store = load_or_create_vector_store(text_chunks)

	# User question input
	user_question = st.text_input("Ask a Question:", placeholder="Type your question here...")

	if st.button("Get Response"):
	if not user_question:
	st.warning("Please enter a question before submitting.")
	else:
	with st.spinner("Generating response..."):
	answer = user_input(user_question, vector_store)
	st.markdown(f"🤖 AI: {answer}")

	if __name__ == "__main__":
	main()