deneme / app.py
Tan Gezerman
Update app.py
a0c64c9 verified
from typing import Optional, Dict
import os
import chainlit as cl
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
from chainlit.types import ThreadDict
from langchain.chains import RetrievalQA, ConversationChain
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains.conversation.memory import ConversationBufferMemory
llm = LlamaCpp(
model_path="Model/llama-2-7b-chat.Q4_K_M.gguf",
callback_manager= CallbackManager([StreamingStdOutCallbackHandler()]), # token streaming to terminal
device="cpu", # IF CUDA OUT OF MEMORY, CHANGE TO SOMETHING <33
verbose=True,
max_tokens=4096,
n_ctx=3064,
streaming=True,
config={'temperature': 0.25} # randomness of the reply
)
DATA_PATH = 'Data/'
DB_CHROMA_PATH = 'vectorstore/db_chroma'
template = """
You are an AI specialized in the medical domain.
Your purpose is to provide accurate, clear, and helpful responses to medical-related inquiries.
You must avoid misinformation at all costs. Do not respond to questions outside of the medical domain.
You an summarize chapters, pharagraphs and other content that is in the medical domain.
If you are unsure or lack information about a query, you must clearly state that you do not know the answer.
Question: {query}
Answer:
"""
prompt_template = PromptTemplate(input_variables=["query"], template=template)
rag_template = """
You are an AI specialized in the medical domain.
Your purpose is to provide accurate, clear, and helpful responses to medical-related inquiries.
You must avoid misinformation at all costs. Do not respond to questions outside of the medical domain.
If you are unsure or lack information about a query, you must clearly state that you do not know the answer.
Question: {query}
Answer:
"""
rag_template = PromptTemplate(input_variables=["query"], template=rag_template)
embedding_function = HuggingFaceEmbeddings(
model_name='sentence-transformers/all-MiniLM-L6-v2',
model_kwargs={'device': 'cpu'}
)
db = Chroma(persist_directory=DB_CHROMA_PATH, embedding_function=embedding_function)
rag_pipeline = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=db.as_retriever(),
return_source_documents=True
)
conversation_buf = ConversationChain(
llm=llm,
memory=ConversationBufferMemory(),
)
@cl.on_chat_start
async def on_chat_start():
pass
@cl.step()
async def get_response(query):
"""
Generates a response from the language model based on the user's input. If the input includes
'-rag', it uses a retrieval-augmented generation pipeline, otherwise, it directly invokes
the language model.
Args:
question (str): The user's input text.
Returns:
str: The language model's response, potentially including source documents if '-rag' was used.
"""
if "-rag" in query.lower():
response = await cl.make_async(rag_pipeline)(rag_template.format(query=query))
result = response["result"]
source = response["source_documents"]
if source:
source_details = "\n\nSources:"
for source in source:
page_content = source.page_content
page_number = source.metadata.get('page', 'N/A')
source_book = source.metadata.get('source', 'N/A')
source_details += f"\n- Page {page_number} from {source_book}: \"{page_content}\""
result += source_details
return result
return await cl.make_async(llm.invoke)(prompt_template.format(query=query))
@cl.on_message
async def on_message(message: cl.Message):
"""
Fetches the response from the language model and shows it in the web UI.
"""
response = await get_response(message.content)
msg = cl.Message(content=response)
await msg.send()