import os import gradio as gr from dotenv import load_dotenv from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings, ) from llama_index.llms.huggingface import HuggingFaceLLM from transformers import BitsAndBytesConfig load_dotenv() hf_token = os.getenv('HF_TOKEN') # Path to your local corpus directory PERSIST_DIR = './storage' corpus_directory = 'articles' quantization_config = BitsAndBytesConfig( load_in_4bit=True, # bnb_4bit_compute_dtype=torch.float16, # bnb_4bit_quant_type="nf4", # bnb_4bit_use_double_quant=True, ) # Configure the settings Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5") Settings.llm = HuggingFaceLLM( model_name="microsoft/Phi-3-small-8k-instruct", tokenizer_name="microsoft/Phi-3-small-8k-instruct", context_window=3900, max_new_tokens=500, model_kwargs={"quantization_config": quantization_config}, generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, # messages_to_prompt=messages_to_prompt, # completion_to_prompt=completion_to_prompt, device_map="auto", ) if not os.path.exists(PERSIST_DIR): # load the documents and create the index documents = SimpleDirectoryReader(corpus_directory).load_data() index = VectorStoreIndex.from_documents(documents) # store it for later index.storage_context.persist(persist_dir=PERSIST_DIR) else: # load the existing index storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) index = load_index_from_storage(storage_context) query_engine = index.as_query_engine() # def chat(): # print("Chatbot is ready. Type 'exit' to end the conversation.") # while True: # user_input = input("You: ") # if user_input.lower() == 'exit': # print("Ending the chat. Goodbye!") # break # response = query_engine.query(user_input) # print(f"Chatbot: {response}") def chatbot_response(message, history): response = query_engine.query(message) return str(response) iface = gr.ChatInterface( fn=chatbot_response, title="UESP Lore Chatbot", description="Ask questions about The Elder Scrolls lore!", examples=["Who is Vivec?", "Tell me about the Oblivion Crisis", "Who is King Edward?"], cache_examples=True, ) # Launch the interface if __name__ == "__main__": # chat() iface.launch()