|
import os |
|
from dotenv import load_dotenv |
|
import gradio as gr |
|
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate |
|
from llama_index.llms.huggingface import HuggingFaceInferenceAPI |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from sentence_transformers import SentenceTransformer |
|
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings |
|
load_dotenv() |
|
|
|
Settings.llm = HuggingFaceInferenceAPI( |
|
model_name="google/gemma-1.1-7b-it", |
|
tokenizer_name="google/gemma-1.1-7b-it", |
|
context_window=3000, |
|
token=os.getenv("HF_TOKEN"), |
|
max_new_tokens=512, |
|
generate_kwargs={"temperature": 0.1}, |
|
) |
|
Settings.embed_model = HuggingFaceEmbedding( |
|
model_name="BAAI/bge-small-en-v1.5" |
|
) |
|
|
|
|
|
PERSIST_DIR = "db" |
|
PDF_DIRECTORY = 'data' |
|
|
|
|
|
os.makedirs(PDF_DIRECTORY, exist_ok=True) |
|
os.makedirs(PERSIST_DIR, exist_ok=True) |
|
|
|
def data_ingestion_from_directory(): |
|
|
|
documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data() |
|
storage_context = StorageContext.from_defaults() |
|
index = VectorStoreIndex.from_documents(documents) |
|
index.storage_context.persist(persist_dir=PERSIST_DIR) |
|
|
|
def handle_query(query): |
|
chat_text_qa_msgs = [ |
|
( |
|
"user", |
|
""" |
|
You are now the RedfernsTech chatbot. Your aim is to provide the best user experience and give me best answer to the user only one answer. talk like professional chatbot your now my redfernstech chatbot |
|
and If anyone asks you personal questions, direct them to ask for details about the company. |
|
Context: |
|
{context_str} |
|
Question: |
|
{query_str} |
|
""" |
|
) |
|
] |
|
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs) |
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) |
|
index = load_index_from_storage(storage_context) |
|
|
|
query_engine = index.as_query_engine(text_qa_template=text_qa_template) |
|
answer = query_engine.query(query) |
|
|
|
if hasattr(answer, 'response'): |
|
return answer.response |
|
elif isinstance(answer, dict) and 'response' in answer: |
|
return answer['response'] |
|
else: |
|
return "Sorry, I couldn't find an answer." |
|
|
|
|
|
|
|
|
|
print("Processing PDF ingestion from directory:", PDF_DIRECTORY) |
|
data_ingestion_from_directory() |
|
|
|
|
|
query = "How do I use the RedfernsTech Q&A assistant?" |
|
print("Query:", query) |
|
response = handle_query(query) |
|
print("Answer:", response) |
|
|
|
|
|
|
|
|
|
|
|
input_component = gr.Textbox( |
|
show_label=False, |
|
placeholder="Ask me anything about the document..." |
|
) |
|
|
|
output_component = gr.Textbox() |
|
|
|
|
|
interface = gr.Interface( |
|
fn=handle_query, |
|
inputs=input_component, |
|
outputs=output_component, |
|
title="RedfernsTech Q&A Chatbot", |
|
description="Ask me anything about the uploaded document." |
|
) |
|
|
|
|
|
interface.launch() |
|
|