Spaces:
Sleeping
Sleeping
import spaces | |
import gradio as gr | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
from llama_index.core.postprocessor import SentenceTransformerRerank | |
from llama_index.core import ( | |
StorageContext, | |
load_index_from_storage, Settings, | |
) | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
import torch | |
PERSIST_DIR = './storage' | |
# Configure the settings | |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
Settings.embed_model = OpenAIEmbedding() | |
# Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu") | |
Settings.llm = HuggingFaceLLM( | |
model_name="meta-llama/Meta-Llama-3-8B-Instruct", | |
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct", | |
context_window=2048, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, | |
device_map="auto", | |
) | |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) | |
index = load_index_from_storage(storage_context) | |
query_engine = index.as_query_engine() | |
rerank = SentenceTransformerRerank( | |
model="BAAI/bge-reranker-large", top_n=5 # Note here | |
) | |
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank]) | |
def chatbot_response(message, history): | |
response = query_engine.query(message) | |
return str(response) | |
iface = gr.ChatInterface( | |
fn=chatbot_response, | |
title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'", | |
description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore", | |
examples=["Who is Zaraphus?", "What is the relation between dragonbreak and chim?", "What is the Lunar Lorkhan?"], | |
cache_examples=True, | |
) | |
if __name__ == "__main__": | |
iface.launch() | |