Spaces:

amanjain96
/

Agentic_RAG

Runtime error

Agentic_RAG / app.py

Aman Jain

Fixes

27eee56 7 months ago

9.92 kB

	import pandas as pd
	from transformers import AutoTokenizer
	from langchain.docstore.document import Document
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores.utils import DistanceStrategy
	from tqdm import tqdm
	from transformers.agents import Tool, HfApiEngine, ReactJsonAgent
	from huggingface_hub import InferenceClient
	import os
	from langchain_community.document_loaders import DirectoryLoader
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_groq import ChatGroq
	from groq import Groq
	from typing import List, Dict
	from transformers.agents.llm_engine import MessageRole, get_clean_message_list
	from huggingface_hub import InferenceClient
	import streamlit as st

	token = os.getenv("HF_TOKEN")
	os.environ["GROQ_API_KEY"] = "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"
	groq_api_key = os.getenv("GROQ_API_KEY")

	# model_id="mistralai/Mistral-7B-Instruct-v0.3"
	loader = DirectoryLoader('DATA', glob="*/.pdf", show_progress=True)
	docs = loader.load()

	tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
	text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
	tokenizer,
	chunk_size=200,
	chunk_overlap=20,
	add_start_index=True,
	strip_whitespace=True,
	separators=["\n\n", "\n", ".", " ", ""],
	)

	# Split documents and remove duplicates

	docs_processed = []
	unique_texts = {}
	for doc in tqdm(docs):
	new_docs = text_splitter.split_documents([doc])
	for new_doc in new_docs:
	if new_doc.page_content not in unique_texts:
	unique_texts[new_doc.page_content] = True
	docs_processed.append(new_doc)


	model_name = "thenlper/gte-small"
	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'normalize_embeddings': False}
	embedding_model = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)

	# Create the vector database
	vectordb = FAISS.from_documents(
	documents=docs_processed,
	embedding=embedding_model,
	distance_strategy=DistanceStrategy.COSINE,
	)

	class RetrieverTool(Tool):
	name = "retriever"
	description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
	inputs = {
	"query": {
	"type": "string",
	"description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
	}
	}
	output_type = "string"

	def __init__(self, vectordb, **kwargs):
	super().__init__(**kwargs)
	self.vectordb = vectordb

	def forward(self, query: str) -> str:
	assert isinstance(query, str), "Your search query must be a string"

	docs = self.vectordb.similarity_search(
	query,
	k=7,
	)

	return "\nRetrieved documents:\n" + "".join(
	[f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
	)


	# Create an instance of the RetrieverTool
	retriever_tool = RetrieverTool(vectordb)

	llm = ChatGroq(
	model="llama3-70b-8192",
	temperature=0,
	max_tokens=2048,
	)

	openai_role_conversions = {
	MessageRole.TOOL_RESPONSE: MessageRole.USER,
	}

	class OpenAIEngine:
	def __init__(self, model_name="llama-3.3-70b-versatile"):
	print(groq_api_key)
	self.model_name = model_name
	self.client = Groq(
	api_key=groq_api_key,
	)

	def __call__(self, messages, stop_sequences=[]):
	messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)

	response = self.client.chat.completions.create(
	model=self.model_name,
	messages=messages,
	stop=stop_sequences,
	temperature=0.5,
	max_tokens=2048
	)
	return response.choices[0].message.content

	llm_engine = OpenAIEngine()


	# Create the agent
	agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)

	# Function to run the agent
	def run_agentic_rag(question: str) -> str:
	enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
	give a comprehensive answer to the question below.
	Respond only to the question asked, response should be concise and relevant to the question.
	If you cannot find information, do not give up and try calling your retriever again with different arguments!
	Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
	Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

	Question:
	{question}"""

	return agent.run(enhanced_question)


	# def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
	# """
	# Returns a language model for HuggingFace inference.

	# Parameters:
	# - model_id (str): The ID of the HuggingFace model repository.
	# - max_new_tokens (int): The maximum number of new tokens to generate.
	# - temperature (float): The temperature for sampling from the model.

	# Returns:
	# - llm (HuggingFaceEndpoint): The language model for HuggingFace inference.
	# """
	# llm = HuggingFaceEndpoint(
	# repo_id=model_id,
	# max_new_tokens=max_new_tokens,
	# temperature=temperature,
	# token = os.getenv("HF_TOKEN")
	# )
	# return llm






	def get_response(chat_history, user_text):
	"""
	Generates a response from the chatbot model.

	Args:
	system_message (str): The system message for the conversation.
	chat_history (list): The list of previous chat messages.
	user_text (str): The user's input text.
	model_id (str, optional): The ID of the HuggingFace model to use.
	eos_token_id (list, optional): The list of end-of-sentence token IDs.
	max_new_tokens (int, optional): The maximum number of new tokens to generate.
	get_llm_hf_kws (dict, optional): Additional keyword arguments for the get_llm_hf function.

	Returns:
	tuple: A tuple containing the generated response and the updated chat history.
	"""

	# Update the chat history
	chat_history.append({'role': 'user', 'content': user_text})
	chat_history.append({'role': 'assistant', 'content': run_agentic_rag(user_text)})
	return run_agentic_rag(user_text), chat_history


	st.set_page_config(page_title="Hi, I am Telto assistant", page_icon="🤗")
	st.title("Telto Support")
	st.markdown(f"This is telto assistant. For any guidance on how to use Telto, feel free to ask me.")

	# Initialize session state for avatars
	if "avatars" not in st.session_state:
	st.session_state.avatars = {'user': None, 'assistant': None}

	# Initialize session state for user text input
	if 'user_text' not in st.session_state:
	st.session_state.user_text = None

	if "system_message" not in st.session_state:
	st.session_state.system_message = "friendly AI conversing with a human user"

	if "starter_message" not in st.session_state:
	st.session_state.starter_message = "Hello, there! How can I help you today?"

	# Sidebar for settings
	with st.sidebar:
	st.header("System Settings")

	# Avatar Selection
	st.markdown("Select Avatars:")
	col1, col2 = st.columns(2)
	with col1:
	st.session_state.avatars['assistant'] = st.selectbox(
	"AI Avatar", options=["🤗", "💬", "🤖"], index=0
	)
	with col2:
	st.session_state.avatars['user'] = st.selectbox(
	"User Avatar", options=["👤", "👱‍♂️", "👨🏾", "👩", "👧🏾"], index=0
	)
	# Reset Chat History
	reset_history = st.button("Reset Chat History")

	# Initialize or reset chat history
	if "chat_history" not in st.session_state or reset_history:
	st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
	# Chat interface


	chat_interface = st.container(border=True)
	with chat_interface:
	output_container = st.container()
	st.session_state.user_text = st.chat_input(placeholder="Enter your text here.")

	# Display chat messages
	with output_container:
	# For every message in the history
	for message in st.session_state.chat_history:
	# Skip the system message
	if message['role'] == 'system':
	continue

	# Display the chat message using the correct avatar
	with st.chat_message(message['role'],
	avatar=st.session_state['avatars'][message['role']]):
	st.markdown(message['content'])

	# When the user enter new text:
	if st.session_state.user_text:

	# Display the user's new message immediately
	with st.chat_message("user",
	avatar=st.session_state.avatars['user']):
	st.markdown(st.session_state.user_text)

	# Display a spinner status bar while waiting for the response
	with st.chat_message("assistant",
	avatar=st.session_state.avatars['assistant']):

	with st.spinner("Thinking..."):
	# Call the Inference API with the system_prompt, user text, and history


	response, st.session_state.chat_history = get_response(
	user_text=st.session_state.user_text,
	chat_history=st.session_state.chat_history,
	)
	st.markdown(response)