Spaces:
Sleeping
Sleeping
import spaces | |
import gradio as gr | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.core import StorageContext, load_index_from_storage, Settings | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
import torch | |
from pydantic import BaseModel | |
PERSIST_DIR = './storage' | |
# Configure the settings | |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Pydantic config to avoid protected namespace warning | |
class Config(BaseModel): | |
model_config = {'protected_namespaces': ()} | |
# @spaces.GPU(duration=240) | |
def setup(): | |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE) | |
Settings.llm = HuggingFaceLLM( | |
model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
context_window=2048, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, | |
device_map="auto", | |
) | |
setup() | |
# Load the existing index | |
# @spaces.GPU | |
def load_context(): | |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) | |
index = load_index_from_storage(storage_context) | |
query_engine = index.as_query_engine() | |
return query_engine | |
query_engine = None | |
def initialize_query_engine(): | |
global query_engine | |
query_engine = load_context() | |
# Initialize query engine at the start | |
initialize_query_engine() | |
# Chatbot response function | |
def chatbot_response(message, history): | |
if query_engine is None: | |
initialize_query_engine() | |
response = query_engine.query(message) | |
return str(response) | |
# Initialize Gradio interface | |
iface = gr.ChatInterface( | |
fn=chatbot_response, | |
title="UESP Lore Chatbot: CPU bound version of Phi-3-mini", | |
description=( | |
"Low quality and extremely slow version of the ones you can find on the github page: " | |
"https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here." | |
), | |
examples=["Who is Zaraphus?"], | |
cache_examples=True, | |
) | |
if __name__ == "__main__": | |
iface.launch() | |