MarcoAland's picture
Update app.py
8440a15 verified
raw
history blame
3.84 kB
### RAG code
# Embedding model builder
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
def set_embed_model(model_name: str,
chunk_size: int = 256,
chunk_overlap: int = 25) -> None:
Settings.llm = None
Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
Settings.chunk_size = chunk_size
Settings.chunk_overlap = chunk_overlap
class RAGModule:
def __init__(self,
llm_model: str = "MarcoAland/llama3.1-rag-indo",
embedding_model: str = "MarcoAland/Indo-bge-m3",
docs_path: str = "data",
top_k: int = 3,
similarity_cutoff: float = 0.4):
# Define embedding model
set_embed_model(model_name=embedding_model)
# Set vector DB
documents = SimpleDirectoryReader(docs_path).load_data()
index = VectorStoreIndex.from_documents(documents)
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=top_k,
)
self.top_k = top_k
self.query_engine = RetrieverQueryEngine(
retriever=retriever,
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)]
)
def format_context(self, response):
context = "Jawab dengan akurat\n\nContext:\n"
for i in range(self.top_k):
context += response.source_nodes[i].text + "\n\n"
return context
def query(self, query: str):
try:
response = self.query_engine.query(query)
context = self.format_context(response)
return context
except:
return ""
def prompt(self, context: str, instruction: str):
return f"{context}\n ### Instruksi:\n {instruction}"
def main(self, instruction: str):
context = self.query(query=instruction)
prompt = self.prompt(context=context, instruction=instruction)
return prompt
### Chainlit code
import chainlit as cl
from openai import AsyncOpenAI
RAG_Trwira = RAGModule()
# Configure the async OpenAI client
client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")
settings = {
"model": "MarcoAland/llama3.1-rag-indo",
"temperature":0.3,
"max_tokens": 2048,
}
@cl.on_chat_start
async def start_chat():
# Display a title in the UI using Markdown
await cl.Message(content="# Hai, namaku Mitrakara👋\n\n ## Selamat datang!\n\nSiap menjadi partner dalam berkarya didunia profesional😊").send()
@cl.on_message
async def main(message: cl.Message):
if "document:" in message.content.lower() or "documents:" in message.content.lower():
# Prepare the message with documents context
prompt = RAG_Trwira.main(message.content[10:]) # slice the "documents" command
else:
# Without documents context
prompt = message.content
# Format the messages as a list of message dictionaries
message_formated = [
{"role": "user", "content": prompt}
]
# Create an initial empty message to send back to the user
msg = cl.Message(content="")
await msg.send()
# Use streaming to handle partial responses
stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)
async for part in stream:
if token := part.choices[0].delta.content or "":
await msg.stream_token(token)
# Update the message after streaming completion
await msg.update()