emar
fixed omitted import
b7463ad
raw
history blame
No virus
1.9 kB
import spaces
import gradio as gr
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core import (
StorageContext,
load_index_from_storage, Settings,
)
from llama_index.embeddings.openai import OpenAIEmbedding
import torch
PERSIST_DIR = './storage'
# Configure the settings
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Settings.embed_model = OpenAIEmbedding()
# Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
Settings.llm = HuggingFaceLLM(
model_name="meta-llama/Meta-Llama-3-8B-Instruct",
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
context_window=2048,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
device_map="auto",
)
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
rerank = SentenceTransformerRerank(
model="BAAI/bge-reranker-large", top_n=5 # Note here
)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank])
@spaces.GPU
def chatbot_response(message, history):
response = query_engine.query(message)
return str(response)
iface = gr.ChatInterface(
fn=chatbot_response,
title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'",
description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
examples=["Who is Zaraphus?", "What is the relation between dragonbreak and chim?", "What is the Lunar Lorkhan?"],
cache_examples=True,
)
if __name__ == "__main__":
iface.launch()