Spaces:

kushagra124
/

PDF-Summarization

Sleeping

PDF-Summarization / app /core /models.py

Kushagra

Moving Cache and module to /tmp directory

8eaaf5c 20 days ago

4.81 kB


	import os
	import logging
	import time
	import asyncio

	from fastapi import status
	from langchain_groq import ChatGroq
	from langchain.schema import Document
	from langchain.chains import RetrievalQA
	from langchain_community.vectorstores import FAISS
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader
	from app.core.template import prompt_template_description

	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Async PDF loader
	async def pdf_loader(url: str):
	pages = []
	loader = PyPDFLoader(url)
	async for page in loader.alazy_load():
	pages.append(page)

	return pages

	# Main function to create/load vectorstore
	async def load_and_create_vector_store(url: str):
	"""
	Loads a PDF document from a URL and either reuses or builds a FAISS vectorstore.
	Returns a retriever object.
	"""
	vectorstore_path = "/tmp/database/faiss_index"

	if os.path.exists(f"{vectorstore_path}/index.faiss"):
	logging.info("Vector store already exists, loading it.")
	vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
	else:
	logging.info("Vector store not found. Creating new one from document.")
	pages = await pdf_loader(url)
	if not pages:
	raise ValueError("No pages loaded from the document.")

	full_text = "\n\n".join([page.page_content for page in pages])
	documents = [Document(page_content=full_text, metadata={"source": url})]
	# Use CharacterTextSplitter with optimized parameters for better chunk quality
	text_splitter = CharacterTextSplitter(
	separator="\n\n",
	chunk_size=2500,
	chunk_overlap=300,
	length_function=len,
	)
	split_docs = text_splitter.split_documents(documents)
	logging.info(f"Document split into {len(split_docs)} chunks")

	vectorstore = FAISS.from_documents(split_docs, embeddings)
	vectorstore.save_local(vectorstore_path)

	return vectorstore.as_retriever(
	search_kwargs={"k": 2, "score_threshold": 0.5}
	)

	async def llm_setup(config, url):
	"""
	Setup the LLM for question answering.

	This function initializes the LLM with the necessary configurations
	for processing questions and generating answers based on the context.

	Args:
	config: Configuration dictionary with LLM settings
	url: URL of the document to process
	Returns:
	object: The configured LLM instance.
	"""
	llm = ChatGroq(
	model=f"{config.get('MODEL_NAME')}",
	temperature=f"{config.get('TEMPERATURE', 0)}",
	max_tokens=f"{config.get('MAX_TOKENS', 300)}", # Increased token limit for JSON responses
	max_retries=f"{config.get('MAX_RETRIES', 3)}",
	api_key=f"{os.getenv('GROQ_KEY')}",
	)
	logging.info(f"LLM initialized with model: {config.get('MODEL_NAME')}, api_key: {os.getenv('GROQ_KEY')}")

	# Choose template based on whether we need structured JSON output
	prompt_template = prompt_template_description()

	retriever = await load_and_create_vector_store(url=url)

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	chain_type_kwargs={"prompt": prompt_template}
	)
	return qa_chain

	async def llm_response_generator(config, url, questions):
	"""
	Generate answers from the LLM within 30 seconds.

	Args:
	config: Configuration dictionary with LLM settings
	url: URL of the document to process
	questions: List of questions to answer
	use_json: Whether to force JSON output format

	Returns:
	Tuple of (response dict, status code)
	"""
	try:
	start = time.time()
	qa_chain = await llm_setup(config, url)

	answers = []
	for question in questions:
	elapsed = time.time() - start
	if elapsed > 28: # leave margin for safety
	logging.warning("Time limit reached, skipping remaining questions.")
	break

	try:
	answer = await qa_chain.arun(question)
	answers.append(answer)

	except Exception as e:
	logging.error(f"Error answering: {question} \| {e}")
	answers.append("Error processing question.")

	return {"answers": answers}, status.HTTP_200_OK

	except Exception as e:
	logging.error(f"Error in llm_response_generator: {e}")
	return {"answers": []}, status.HTTP_500_INTERNAL_SERVER_ERROR