Spaces:

Codequestt
/

Royal_Document_Assistant

Sleeping

App Files Files Community

Royal_Document_Assistant / app.py

Codequestt

Update app.py

93e2807 verified 4 months ago

raw

history blame contribute delete

6.38 kB

	import os
	import tempfile
	import uuid
	import zipfile
	import io
	from gtts import gTTS
	from langchain_community.llms import OpenAI
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import RetrievalQA
	from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.memory import ConversationBufferMemory
	from langchain.llms.base import LLM
	from typing import Any, List, Mapping, Optional
	from openai import OpenAI as OpenAIClient
	import gradio as gr


	class LlamaLLM(LLM):
	client: Any = None

	def __init__(self, api_key: str):
	super().__init__()
	self.client = OpenAIClient(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=api_key
	)

	def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
	completion = self.client.chat.completions.create(
	model="meta/llama-3.3-70b-instruct",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.2,
	top_p=0.7,
	max_tokens=1024,
	)
	return completion.choices[0].message.content

	@property
	def _llm_type(self) -> str:
	return "Llama 3.3"

	def process_pdfs(zip_file, api_key):
	"""Process uploaded ZIP file containing PDFs"""
	print("Processing ZIP file...")
	with tempfile.TemporaryDirectory() as temp_dir:
	print(f"Extracting ZIP to temporary directory: {temp_dir}")
	with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
	zip_ref.extractall(temp_dir)

	print("Loading PDFs...")
	loader = DirectoryLoader(temp_dir, glob="*/.pdf", loader_cls=PyPDFLoader)
	documents = loader.load()

	if not documents:
	raise ValueError("No PDF files found in the uploaded ZIP")

	print(f"Loaded {len(documents)} documents.")
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	texts = text_splitter.split_documents(documents)

	print("Creating embeddings...")
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vectorstore = FAISS.from_documents(texts, embeddings)

	memory = ConversationBufferMemory()
	llm = LlamaLLM(api_key=api_key)
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=vectorstore.as_retriever(),
	memory=memory,
	)

	print("PDF processing complete.")
	return qa_chain, memory

	def generate_audio(text: str) -> str:
	"""Generate audio from text using gTTS"""
	try:
	tts = gTTS(text=text, lang='en')
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_file.name)
	return temp_file.name
	except Exception as e:
	print(f"Audio generation error: {e}")
	return None

	def chat_response(query, qa_chain, memory):
	print(f"Generating response for query: {query}")
	try:
	raw_response = qa_chain.invoke(query)
	print(f"Raw response: {raw_response}")

	royal_prompt = f"""
	Respond as a historical royal figure mentioned in the query.
	Use first-person perspective and be gender-specific.
	Respond in the query's language. Be authoritative but polite.
	Use only context information. If unsure, respond as a monarch would.
	Context: {raw_response}
	Previous conversation: {memory.buffer}
	Query: {query}
	Royal Response:"""

	# Access the LLM through the correct path in the chain
	final_response = qa_chain.combine_documents_chain.llm_chain.llm._call(royal_prompt)
	print(f"Final response: {final_response}")
	memory.save_context({'input': query}, {'output': final_response})
	return final_response, generate_audio(final_response)
	except Exception as e:
	print(f"Error in chat_response: {e}")
	raise gr.Error(f"Error generating response: {e}")

	with gr.Blocks() as demo:
	gr.Markdown("""
	# 👑 Royal Document Assistant
	<small>This agent can help you with any historical material in a fun and engaging experience, including text and voice responses. But first, visit NVIDIA LLaMA 3.3 70B and get your API key..</small>
	""")

	qa_chain = gr.State()
	memory = gr.State()

	with gr.Row():
	with gr.Column():
	api_key_input = gr.Textbox(label="Enter your NVIDIA API Key", type="password")
	zip_upload = gr.File(label="Upload ZIP of PDFs", type="filepath")
	load_btn = gr.Button("Process Documents")
	load_status = gr.Markdown()

	with gr.Row(visible=False) as chat_row:
	with gr.Column():
	chat_input = gr.Textbox(label="Ask the Royal Assistant")
	chat_output = gr.Textbox(label="Response", interactive=False)
	audio_output = gr.Audio(label="Spoken Response", type="filepath")
	submit_btn = gr.Button("Ask")

	def load_docs(zip_file, api_key):
	try:
	chain, mem = process_pdfs(zip_file, api_key)
	return (
	gr.update(visible=True),
	chain,
	mem,
	"✅ Documents processed! You may now ask questions"
	)
	except Exception as e:
	return (
	gr.update(visible=False),
	None,
	None,
	f"❌ Error processing documents: {str(e)}"
	)

	def ask_question(query, qa_chain, memory):
	if not qa_chain or not memory:
	raise gr.Error("Please process documents first!")
	try:
	response, audio = chat_response(query, qa_chain, memory)
	return response, audio
	except Exception as e:
	print(f"Error in ask_question: {e}")
	return f"Error: {str(e)}", None

	load_btn.click(
	load_docs,
	inputs=[zip_upload, api_key_input],
	outputs=[chat_row, qa_chain, memory, load_status]
	)

	submit_btn.click(
	ask_question,
	inputs=[chat_input, qa_chain, memory],
	outputs=[chat_output, audio_output]
	)

	if __name__ == "__main__":
	demo.launch(share=True)