Spaces:

mohcineelharras
/

llama-index-docs-spaces

Sleeping

App Files Files Community

llama-index-docs-spaces / app.py

mohcineelharras

init

e6e7a99 11 months ago

raw

history blame

10.3 kB

	# --------------------------------libraries-----------------------------------

	import streamlit as st
	#import torch
	import os
	import logging
	import sys
	from llama_index.callbacks import CallbackManager, LlamaDebugHandler
	from llama_index.llms import LlamaCPP
	from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
	from llama_index.embeddings import InstructorEmbedding
	from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
	from tqdm.notebook import tqdm
	from dotenv import load_dotenv

	# --------------------------------env variables-----------------------------------

	# Load environment variables
	load_dotenv(dotenv_path=".env")

	no_proxy = os.getenv("no_proxy")
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")

	# --------------------------------cache LLM-----------------------------------

	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
	llama_debug = LlamaDebugHandler(print_trace_on_end=True)
	callback_manager = CallbackManager([llama_debug])
	# LLM
	@st.cache_resource
	def load_llm_model():
	if not os.path.exists("models"):
	st.error("models directory does not exist. Please download and copy paste a model in folder models.")
	os.makedirs("models")
	return None #
	llm = LlamaCPP(
	#model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
	model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
	temperature=0.0,
	max_new_tokens=100,
	context_window=1024,
	generate_kwargs={},
	model_kwargs={"n_gpu_layers": 20},
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	verbose=True,
	)
	return llm

	llm = load_llm_model()

	# --------------------------------cache Embedding model-----------------------------------

	@st.cache_resource
	def load_emb_model():
	if not os.path.exists("data"):
	st.error("Data directory does not exist. Please upload the data.")
	os.makedirs("data")
	return None #
	embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
	#model_name="hkunlp/instructor-base"
	)
	service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
	documents = SimpleDirectoryReader("data").load_data()
	print(f"Number of documents: {len(documents)}")
	index = VectorStoreIndex.from_documents(
	documents, service_context=service_context, show_progress=True)
	return index.as_query_engine()

	query_engine = load_emb_model()

	# ------------------------------------layout----------------------------------------

	with st.sidebar:
	api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base")
	st.title("🤖 Llama Index 📚")
	if st.button('Clear Memory'):
	st.session_state.memory = ""
	st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
	st.write("🚀 This app allows you to chat with local LLM using api server or loaded in cache")
	st.subheader("💻 System Requirements: ")
	st.markdown("- CPU: the faster the better ")
	st.markdown("- RAM: 16 GB or higher")
	st.markdown("- GPU: optional but very useful for Cuda acceleration")
	st.subheader("Developer Information:")
	st.write("This app is developed and maintained by @mohcineelharras")

	# Define your app's tabs
	tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])

	# -----------------------------------LLM only---------------------------------------------
	if 'memory' not in st.session_state:
	st.session_state.memory = ""
	#token_count = 0
	with tab1:
	st.title("💬 LLM only")
	prompt = st.text_input(
	"Ask your question here",
	placeholder="Who is Lionel Messi",
	)
	template = (
	"system\n"
	"You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to. "
	"Do not provide information that is not contained in the documents. "
	"If a question is asked about content not in the documents, respond with 'I do not have that information.' "
	"Always respond in the same language as the question was asked. Be concise.\n"
	"user\n"
	"{prompt}\n"
	"assistant\n"
	)
	if prompt:
	contextual_prompt = st.session_state.memory + "\n" + prompt
	formatted_prompt = template.format(prompt=contextual_prompt)

	response = llm.complete(formatted_prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10)
	#print(response)
	text_response = response
	#---------------------------------------------
	# text_response = response["choices"][0]["text"]
	# token_count += response["usage"]["total_tokens"]
	# st.write("LLM's Response:\n", text_response)
	# st.write("Token count:\n", token_count)
	#---------------------------------------------
	st.write("LLM's Response:\n",text_response)
	st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
	#st.write("Memory:\n", memory)
	with open("short_memory.txt", 'w') as file:
	file.write(st.session_state.memory)

	# -----------------------------------LLM Q&A-------------------------------------------------

	with tab2:
	st.title("💬 LLM RAG QA with database")
	st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/blob/main/data")
	prompt = st.text_input(
	"Ask your question here",
	placeholder="How does the blockchain work ?",
	)
	if prompt:
	response = query_engine.query(prompt)
	st.write("Your prompt: ", prompt)
	st.write("LLM's Response:\n"+ response.response)
	with st.expander("Document Similarity Search"):
	for i, node in enumerate(response.source_nodes):
	dict_source_i = node.node.metadata
	dict_source_i.update({"Text":node.node.text})
	st.write("Source n°"+str(i+1), dict_source_i)
	st.write()

	# -----------------------------------Upload File Q&A-----------------------------------------

	def load_emb_uploaded_document(filename):
	# You may want to add a check to prevent execution during initialization.
	if 'init' in st.session_state:
	embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
	service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
	documents = SimpleDirectoryReader(input_files=[filename]).load_data()
	index = VectorStoreIndex.from_documents(
	documents, service_context=service_context, show_progress=True)
	return index.as_query_engine()
	return None

	with tab3:
	st.title("📝 One single document Q&A with Llama Index using local open llms")
	uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
	question = st.text_input(
	"Ask something about the files",
	placeholder="Can you give me a short summary?",
	disabled=not uploaded_file,
	)

	if 'init' not in st.session_state:
	st.session_state.init = True

	if uploaded_file:
	if not os.path.exists("draft_docs"):
	st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.")
	os.makedirs("draft_docs")

	with open("draft_docs/"+uploaded_file.name, "wb") as f:
	text = uploaded_file.read()
	f.write(text)
	text = uploaded_file.read()
	# if load_emb_uploaded_document:
	# load_emb_uploaded_document.clear()
	#load_emb_uploaded_document.clear()
	query_engine = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
	st.write("File ",uploaded_file.name, "was loaded successfully")

	if uploaded_file and question and api_server_info:
	response = prompt = f"""Based on the context presented. Respond to the question below to the best of your ability.
	\n\n{question}"""
	response = query_engine.query(prompt)
	st.write("### Answer")
	st.write(response.response)
	with st.expander("Document Similarity Search"):
	#st.write(len(response.source_nodes))
	for i, node in enumerate(response.source_nodes):
	dict_source_i = node.node.metadata
	dict_source_i.update({"Text":node.node.text})
	st.write("Source n°"+str(i+1), dict_source_i)
	#st.write("Source n°"+str(i))
	#st.write("Meta Data :", node.node.metadata)
	#st.write("Text :", node.node.text)
	#st.write()
	#print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)

	st.markdown("""
	<div style="text-align: center; margin-top: 20px;">
	<a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
	<img src="https://img.shields.io/badge/Repository-333?logo=github&style=for-the-badge" alt="Repository" style="vertical-align: middle;">
	</a>
	<a href="https://www.linkedin.com/in/mohcine-el-harras" target="_blank" style="margin: 10px; display: inline-block;">
	<img src="https://img.shields.io/badge/-LinkedIn-0077B5?style=for-the-badge&logo=linkedin" alt="LinkedIn" style="vertical-align: middle;">
	</a>
	<a href="https://mohcineelharras.github.io" target="_blank" style="margin: 10px; display: inline-block;">
	<img src="https://img.shields.io/badge/Visit-Portfolio-9cf?style=for-the-badge" alt="GitHub" style="vertical-align: middle;">
	</a>
	</div>
	<div style="text-align: center; margin-top: 20px; color: #666; font-size: 0.85em;">
	© 2023 Mohcine EL HARRAS
	</div>
	""", unsafe_allow_html=True)


	# -----------------------------------end-----------------------------------------