Spaces:

Rabbit-Innotech
/

GBVR

Runtime error

App Files Files Community

GBVR / app.py

Rabbit-Innotech

Update app.py

768a211 verified 9 months ago

raw

history blame contribute delete

11.9 kB

	import os
	from langchain_groq import ChatGroq
	from langchain.prompts import ChatPromptTemplate, PromptTemplate
	from langchain.output_parsers import ResponseSchema, StructuredOutputParser
	from urllib.parse import urljoin, urlparse
	import requests
	from io import BytesIO
	from langchain_chroma import Chroma
	import requests
	from bs4 import BeautifulSoup
	from langchain_core.prompts import ChatPromptTemplate
	import gradio as gr
	from PyPDF2 import PdfReader
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough

	# Simple session management
	class SessionManager:
	def __init__(self):
	self.sessions = {}

	def get_or_create_session(self, session_id):
	if session_id not in self.sessions:
	self.sessions[session_id] = []
	return self.sessions[session_id]

	def add_interaction(self, session_id, user_message, ai_response):
	session = self.get_or_create_session(session_id)
	session.append({"user": user_message, "ai": ai_response})

	def get_history(self, session_id, max_turns=5):
	session = self.get_or_create_session(session_id)
	recent_history = session[-max_turns:] if len(session) > max_turns else session

	history_text = ""
	for interaction in recent_history:
	history_text += f"User: {interaction['user']}\n"
	history_text += f"Assistant: {interaction['ai']}\n\n"

	return history_text.strip()

	# Initialize session manager
	session_manager = SessionManager()

	groq_api_key= os.environ.get('GBV')

	embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

	def scrape_websites(base_urls):
	try:
	visited_links = set() # To avoid revisiting the same link
	content_by_url = {} # Store content from each URL

	for base_url in base_urls:
	if not base_url.strip():
	continue # Skip empty or invalid URLs

	print(f"Scraping base URL: {base_url}")
	html_content = fetch_page_content(base_url)
	if html_content:
	cleaned_content = clean_body_content(html_content)
	content_by_url[base_url] = cleaned_content
	visited_links.add(base_url)

	# Extract and process all internal links
	soup = BeautifulSoup(html_content, "html.parser")
	links = extract_internal_links(base_url, soup)

	for link in links:
	if link not in visited_links:
	print(f"Scraping link: {link}")
	page_content = fetch_page_content(link)
	if page_content:
	cleaned_content = clean_body_content(page_content)
	content_by_url[link] = cleaned_content
	visited_links.add(link)

	# If the link is a PDF file, extract its content
	if link.lower().endswith('.pdf'):
	print(f"Extracting PDF content from: {link}")
	pdf_content = extract_pdf_text(link)
	if pdf_content:
	content_by_url[link] = pdf_content

	return content_by_url

	except Exception as e:
	print(f"Error during scraping: {e}")
	return {}


	def fetch_page_content(url):
	try:
	response = requests.get(url, timeout=10)
	response.raise_for_status()
	return response.text
	except requests.exceptions.RequestException as e:
	print(f"Error fetching {url}: {e}")
	return None


	def extract_internal_links(base_url, soup):
	links = set()
	for anchor in soup.find_all("a", href=True):
	href = anchor["href"]
	full_url = urljoin(base_url, href)
	if is_internal_link(base_url, full_url):
	links.add(full_url)
	return links


	def is_internal_link(base_url, link_url):
	base_netloc = urlparse(base_url).netloc
	link_netloc = urlparse(link_url).netloc
	return base_netloc == link_netloc


	def extract_pdf_text(pdf_url):
	try:
	response = requests.get(pdf_url)
	response.raise_for_status()
	with BytesIO(response.content) as file:
	reader = PdfReader(file)
	pdf_text = ""
	for page in reader.pages:
	pdf_text += page.extract_text()

	return pdf_text if pdf_text else None
	except requests.exceptions.RequestException as e:
	print(f"Error fetching PDF {pdf_url}: {e}")
	return None
	except Exception as e:
	print(f"Error reading PDF {pdf_url}: {e}")
	return None


	def clean_body_content(html_content):
	soup = BeautifulSoup(html_content, "html.parser")


	for script_or_style in soup(["script", "style"]):
	script_or_style.extract()


	cleaned_content = soup.get_text(separator="\n")
	cleaned_content = "\n".join(
	line.strip() for line in cleaned_content.splitlines() if line.strip()
	)
	return cleaned_content


	if __name__ == "__main__":
	website = ["https://haguruka.org.rw/country/social-cohesion-and-reconciliation/"

	]
	all_content = scrape_websites(website)

	temp_list = []
	for url, content in all_content.items():
	temp_list.append((url, content))


	processed_texts = []


	for element in temp_list:
	if isinstance(element, tuple):
	url, content = element
	processed_texts.append(f"url: {url}, content: {content}")
	elif isinstance(element, str):
	processed_texts.append(element)
	else:
	processed_texts.append(str(element))

	def chunk_string(s, chunk_size=1000):
	return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

	chunked_texts = []

	for text in processed_texts:
	chunked_texts.extend(chunk_string(text))


	vectorstore = Chroma(
	collection_name="GBVR_Datst",
	embedding_function=embed_model,
	persist_directory="./",
	)

	vectorstore.get().keys()

	vectorstore.add_texts(chunked_texts)

	# Updated template to include conversation history
	template = ("""
	You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:

	1. Warm & Natural Interaction
	- If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.
	- Example responses:
	- "😊 Good morning! How can I assist you today?"
	- "Hello! What can I do for you? 🚀"

	2. Precise Information Extraction
	- Provide only the relevant details from the given context: {context}.
	- Do not generate extra content or assumptions beyond the provided information.

	3. Conversational & Engaging Tone
	- Keep responses friendly, natural, and engaging.
	- Use occasional emojis (e.g., 😊, 🚀) to make interactions more lively.

	4. Awareness of Real-Time Context
	- If necessary, acknowledge the current date and time to show awareness of real-world updates.

	5. Handling Missing Information
	- If no relevant information exists in the context, respond politely:
	- "I don't have that information at the moment, but I'm happy to help with something else! 😊"

	6. Personalized Interaction
	- Use the conversation history to provide more personalized and contextually relevant responses.
	- Previous conversation history: {conversation_history}

	7. Direct, Concise Responses
	- If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.

	8. Extracting Relevant Links
	- If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.
	- Example response:
	- "Here is the link you requested: [URL]"

	Context: {context}
	User's Question: {question}
	Your Response:
	""")


	rag_prompt = PromptTemplate.from_template(template)

	retriever = vectorstore.as_retriever()

	llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)

	# Dictionary to store user sessions with session IDs
	user_sessions = {}

	# Define the RAG chain with session history
	def rag_chain(question, session_id="default"):
	# Get conversation history if available
	conversation_history = session_manager.get_history(session_id)

	# Get context from retriever
	context_docs = retriever.invoke(question)
	context = "\n".join(doc.page_content for doc in context_docs)

	# Create prompt with history
	prompt = rag_prompt.format(
	context=context,
	question=question,
	conversation_history=conversation_history
	)

	# Generate response
	response = llm.invoke(prompt).content

	# Store the interaction
	session_manager.add_interaction(session_id, question, response)

	return response

	# Define the RAG memory stream function
	def rag_memory_stream(message, history):
	# Generate a session ID based on the first message if not exists
	session_id = None
	for msg in history:
	if msg[0]: # If there's a user message
	# Use first few characters of first message as simple session ID
	session_id = hash(msg[0][:20]) if session_id is None else session_id
	break

	# Default session ID if history is empty
	if session_id is None:
	session_id = "default_session"

	# Process the message and get response
	response = rag_chain(message, str(session_id))

	# Stream the response word by word
	partial_text = ""
	words = response.split(' ')
	for word in words:
	partial_text += word + " "
	yield partial_text.strip()

	# Title with emojis
	title = "GBVR Chatbot"

	# Custom CSS for styling the interface
	custom_css = """
	/* Custom CSS for styling the interface */
	body {
	font-family: "Arial", serif;
	}

	.gradio-container {
	font-family: "Times New Roman", serif;
	}

	.gr-button {
	background-color: #007bff; /* Blue button */
	color: white;
	border: none;
	border-radius: 5px;
	font-size: 16px;
	padding: 10px 20px;
	cursor: pointer;
	}

	.gr-textbox:focus, .gr-button:focus {
	outline: none; /* Remove outline focus for a cleaner look */
	}

	/* Specific CSS for the welcome message */
	.gradio-description {
	font-size: 30px; /* Set font size for the welcome message */
	font-family: "Arial", sans-serif;
	text-align: center; /* Optional: Center-align the text */
	padding: 20px; /* Optional: Add padding around the welcome message */
	}

	"""

	# Generate a simple welcome message using the LLM
	def generate_welcome_message():
	welcome_prompt = """
	Generate a short, simple welcome message for a chatbot about Gender-Based Violence Resources in Rwanda.
	Keep it under 3 sentences, and use simple language.
	Make it warm and supportive but direct and easy to read.
	"""

	# Get the welcome message from the LLM
	welcome_message = llm.invoke(welcome_prompt).content
	return welcome_message

	# Create simple welcome message
	welcome_msg = generate_welcome_message()

	# Create the Chat Interface with welcome message
	demo = gr.ChatInterface(
	fn=rag_memory_stream,
	title=title,
	fill_height=True,
	theme="soft",
	css=custom_css, # Apply the custom CSS
	description=welcome_msg
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(share=True, inbrowser=True, debug=True)