boost-space-llm / app.py
filipsedivy's picture
Update prompts
fc6868b
import gradio as gr
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import InferenceClient
embeddings = SentenceTransformerEmbeddings(model_name="msmarco-distilbert-base-v4")
db = Chroma(persist_directory="embeddings", embedding_function=embeddings)
client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
def respond(
message,
history: list[tuple[str, str]],
):
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
matching_docs = db.similarity_search(message)
if not matching_docs:
prompt = (
f"<s>[INST] You are an expert in generating responses when there is no information available. "
f"Unfortunately, there are no relevant documents available to answer the following query:\n\n"
f"Query: {message}\n\n"
f"Please provide a polite and original response to inform the user that the requested information is not "
f"available.[/INST]</s>"
)
else:
context = ""
current_length = 0
for i, doc in enumerate(matching_docs):
doc_text = f"Document {i + 1}:\n{doc.page_content}\n\n"
doc_length = len(doc_text.split())
context += doc_text
current_length += doc_length
prompt = (
f"<s>[INST] You are an expert in summarizing and answering questions based on given documents. "
f"You're an expert in English grammar at the same time. "
f"This means that your texts are flawless, correct and grammatically correct."
f"Never write in the output response what document the response is in. It looks very unprofessional."
f"Please provide a detailed and well-explained answer to the following query in 4-6 sentences:\n\n"
f"Query: {message}\n\n"
f"Based on the following documents:\n{context}\n\n"
f"Answer:[/INST]</s>"
)
messages.append({"role": "user", "content": prompt})
response = ""
for message in client.chat_completion(
messages,
max_tokens=250,
stream=True,
temperature=0.7,
top_p=0.95,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
title="Boost.space Docs LLM",
)
if __name__ == "__main__":
demo.launch()