mohcineelharras's picture
init
e6e7a99
raw
history blame
10.3 kB
# --------------------------------libraries-----------------------------------
import streamlit as st
#import torch
import os
import logging
import sys
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index.embeddings import InstructorEmbedding
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
from tqdm.notebook import tqdm
from dotenv import load_dotenv
# --------------------------------env variables-----------------------------------
# Load environment variables
load_dotenv(dotenv_path=".env")
no_proxy = os.getenv("no_proxy")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
# --------------------------------cache LLM-----------------------------------
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
# LLM
@st.cache_resource
def load_llm_model():
if not os.path.exists("models"):
st.error("models directory does not exist. Please download and copy paste a model in folder models.")
os.makedirs("models")
return None #
llm = LlamaCPP(
#model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf",
model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
temperature=0.0,
max_new_tokens=100,
context_window=1024,
generate_kwargs={},
model_kwargs={"n_gpu_layers": 20},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
return llm
llm = load_llm_model()
# --------------------------------cache Embedding model-----------------------------------
@st.cache_resource
def load_emb_model():
if not os.path.exists("data"):
st.error("Data directory does not exist. Please upload the data.")
os.makedirs("data")
return None #
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
#model_name="hkunlp/instructor-base"
)
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
documents = SimpleDirectoryReader("data").load_data()
print(f"Number of documents: {len(documents)}")
index = VectorStoreIndex.from_documents(
documents, service_context=service_context, show_progress=True)
return index.as_query_engine()
query_engine = load_emb_model()
# ------------------------------------layout----------------------------------------
with st.sidebar:
api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base")
st.title("πŸ€– Llama Index πŸ“š")
if st.button('Clear Memory'):
st.session_state.memory = ""
st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
st.write("πŸš€ This app allows you to chat with local LLM using api server or loaded in cache")
st.subheader("πŸ’» System Requirements: ")
st.markdown("- CPU: the faster the better ")
st.markdown("- RAM: 16 GB or higher")
st.markdown("- GPU: optional but very useful for Cuda acceleration")
st.subheader("Developer Information:")
st.write("This app is developed and maintained by **@mohcineelharras**")
# Define your app's tabs
tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
# -----------------------------------LLM only---------------------------------------------
if 'memory' not in st.session_state:
st.session_state.memory = ""
#token_count = 0
with tab1:
st.title("πŸ’¬ LLM only")
prompt = st.text_input(
"Ask your question here",
placeholder="Who is Lionel Messi",
)
template = (
"system\n"
"You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to. "
"Do not provide information that is not contained in the documents. "
"If a question is asked about content not in the documents, respond with 'I do not have that information.' "
"Always respond in the same language as the question was asked. Be concise.\n"
"user\n"
"{prompt}\n"
"assistant\n"
)
if prompt:
contextual_prompt = st.session_state.memory + "\n" + prompt
formatted_prompt = template.format(prompt=contextual_prompt)
response = llm.complete(formatted_prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10)
#print(response)
text_response = response
#---------------------------------------------
# text_response = response["choices"][0]["text"]
# token_count += response["usage"]["total_tokens"]
# st.write("LLM's Response:\n", text_response)
# st.write("Token count:\n", token_count)
#---------------------------------------------
st.write("LLM's Response:\n",text_response)
st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
#st.write("Memory:\n", memory)
with open("short_memory.txt", 'w') as file:
file.write(st.session_state.memory)
# -----------------------------------LLM Q&A-------------------------------------------------
with tab2:
st.title("πŸ’¬ LLM RAG QA with database")
st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/blob/main/data")
prompt = st.text_input(
"Ask your question here",
placeholder="How does the blockchain work ?",
)
if prompt:
response = query_engine.query(prompt)
st.write("Your prompt: ", prompt)
st.write("LLM's Response:\n"+ response.response)
with st.expander("Document Similarity Search"):
for i, node in enumerate(response.source_nodes):
dict_source_i = node.node.metadata
dict_source_i.update({"Text":node.node.text})
st.write("Source nΒ°"+str(i+1), dict_source_i)
st.write()
# -----------------------------------Upload File Q&A-----------------------------------------
def load_emb_uploaded_document(filename):
# You may want to add a check to prevent execution during initialization.
if 'init' in st.session_state:
embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
documents = SimpleDirectoryReader(input_files=[filename]).load_data()
index = VectorStoreIndex.from_documents(
documents, service_context=service_context, show_progress=True)
return index.as_query_engine()
return None
with tab3:
st.title("πŸ“ One single document Q&A with Llama Index using local open llms")
uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf"))
question = st.text_input(
"Ask something about the files",
placeholder="Can you give me a short summary?",
disabled=not uploaded_file,
)
if 'init' not in st.session_state:
st.session_state.init = True
if uploaded_file:
if not os.path.exists("draft_docs"):
st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.")
os.makedirs("draft_docs")
with open("draft_docs/"+uploaded_file.name, "wb") as f:
text = uploaded_file.read()
f.write(text)
text = uploaded_file.read()
# if load_emb_uploaded_document:
# load_emb_uploaded_document.clear()
#load_emb_uploaded_document.clear()
query_engine = load_emb_uploaded_document("draft_docs/"+uploaded_file.name)
st.write("File ",uploaded_file.name, "was loaded successfully")
if uploaded_file and question and api_server_info:
response = prompt = f"""Based on the context presented. Respond to the question below to the best of your ability.
\n\n{question}"""
response = query_engine.query(prompt)
st.write("### Answer")
st.write(response.response)
with st.expander("Document Similarity Search"):
#st.write(len(response.source_nodes))
for i, node in enumerate(response.source_nodes):
dict_source_i = node.node.metadata
dict_source_i.update({"Text":node.node.text})
st.write("Source nΒ°"+str(i+1), dict_source_i)
#st.write("Source nΒ°"+str(i))
#st.write("Meta Data :", node.node.metadata)
#st.write("Text :", node.node.text)
#st.write()
#print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True)
st.markdown("""
<div style="text-align: center; margin-top: 20px;">
<a href="https://github.com/mohcineelharras/llama-index-docs" target="_blank" style="margin: 10px; display: inline-block;">
<img src="https://img.shields.io/badge/Repository-333?logo=github&style=for-the-badge" alt="Repository" style="vertical-align: middle;">
</a>
<a href="https://www.linkedin.com/in/mohcine-el-harras" target="_blank" style="margin: 10px; display: inline-block;">
<img src="https://img.shields.io/badge/-LinkedIn-0077B5?style=for-the-badge&logo=linkedin" alt="LinkedIn" style="vertical-align: middle;">
</a>
<a href="https://mohcineelharras.github.io" target="_blank" style="margin: 10px; display: inline-block;">
<img src="https://img.shields.io/badge/Visit-Portfolio-9cf?style=for-the-badge" alt="GitHub" style="vertical-align: middle;">
</a>
</div>
<div style="text-align: center; margin-top: 20px; color: #666; font-size: 0.85em;">
Β© 2023 Mohcine EL HARRAS
</div>
""", unsafe_allow_html=True)
# -----------------------------------end-----------------------------------------