Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

App Files Files Community

elderscrolls_lore_bot / app.py

emar

test 2

4b52a00 about 1 month ago

raw

history blame

No virus

2.11 kB

	import spaces
	import gradio as gr
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core import StorageContext, load_index_from_storage, Settings
	from llama_index.llms.huggingface import HuggingFaceLLM
	import torch
	from pydantic import BaseModel

	PERSIST_DIR = './storage'

	# Configure the settings
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Pydantic config to avoid protected namespace warning
	class Config(BaseModel):
	model_config = {'protected_namespaces': ()}

	# @spaces.GPU(duration=240)
	def setup():
	Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
	Settings.llm = HuggingFaceLLM(
	model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	context_window=2048,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
	device_map="auto",
	)

	setup()

	# Load the existing index
	# @spaces.GPU
	def load_context():
	storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
	index = load_index_from_storage(storage_context)
	query_engine = index.as_query_engine()
	return query_engine

	query_engine = None

	def initialize_query_engine():
	global query_engine
	query_engine = load_context()

	# Initialize query engine at the start
	initialize_query_engine()

	# Chatbot response function
	@spaces.GPU
	def chatbot_response(message, history):
	if query_engine is None:
	initialize_query_engine()
	response = query_engine.query(message)
	return str(response)

	# Initialize Gradio interface
	iface = gr.ChatInterface(
	fn=chatbot_response,
	title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
	description=(
	"Low quality and extremely slow version of the ones you can find on the github page: "
	"https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
	),
	examples=["Who is Zaraphus?"],
	cache_examples=True,
	)

	if __name__ == "__main__":
	iface.launch()