Spaces:

Cachoups
/

LoL_Lore

Sleeping

App Files Files Community

LoL_Lore / app.py

Cachoups

Update app.py

065778e verified 6 months ago

raw

history blame contribute delete

8.3 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from huggingface_hub import login
	import re
	import pandas as pd
	from langchain.schema import Document
	from langchain.text_splitter import TokenTextSplitter
	from transformers import AutoTokenizer
	import copy
	from langchain_community.retrievers import BM25Retriever
	from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
	df1 = pd.read_csv("./data/champions_data_lol.csv")
	df1['Story'] = df1['Story'].astype(str)
	# Pre-processing
	def preprocess_for_bm25(text):
	# Replace "..." with a unique placeholder
	text = text.replace("...", " _ELLIPSIS_ ")

	# Add space before and after punctuation (except "_ELLIPSIS_")
	text = re.sub(r'([.,!?()"\'])', r' \1 ', text) # General case for punctuation

	# Restore "..." from the placeholder
	text = text.replace("_ELLIPSIS_", "...")

	# Normalize spaces
	text = re.sub(r'\s+', ' ', text).strip()
	text = text.lower()
	return text

	"""Pre-processing"""
	# Convert DataFrame to documents
	documents = []
	for _, row in df1.iterrows():
	biography_text = row['Story']
	documents.append(Document(
	page_content= biography_text, # Text of the chunk
	metadata= {
	'champion_name': row['Champion'],
	'role': row['Role']}
	))

	"""Chunking"""

	# Specify the model name
	EMBEDDING_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
	tokenizer_name = EMBEDDING_MODEL_NAME

	# Token splitting for more context split
	text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
	tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
	chunk_size=150,
	chunk_overlap=15
	)

	chunks = text_splitter.split_documents(documents) # chunks used for LLM generation

	chunks_bm25 = copy.deepcopy(chunks) # Creates an independent copy, chunks used for BM25 retriever

	for i, doc in enumerate(chunks_bm25):
	doc.page_content = preprocess_for_bm25(doc.page_content) # Modify page_content in place
	doc.metadata["index"] = i # Add an index for tracking

	for i, doc in enumerate(chunks):
	doc.metadata["index"] = i # Add an index for tracking

	"""Retriever"""
	bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 4) # 2 most similar contexts

	def retriever(query):
	tmp = bm25_retriever.invoke(preprocess_for_bm25(query))
	context = []
	for doc in tmp:
	index = doc.metadata['index']
	context.append(chunks[index])
	return context

	"""Chain"""

	#from langchain_core.runnables.passthrough import RunnablePassthrough
	#from langchain.prompts import ChatPromptTemplate
	#from langchain_core.output_parsers.string import StrOutputParser
	from langchain_community.llms.huggingface_hub import HuggingFaceHub
	#=import os
	#from langchain_core.runnables import RunnableLambda


	#prompt_template = ChatPromptTemplate.from_template(prompt)
	"""llm = HuggingFaceHub(
	repo_id="HuggingFaceH4/zephyr-7b-beta",
	#repo_id="google-bert/bert-base-uncased",
	model_kwargs={
	"temperature": 0.1,
	"max_length": 5,
	"return_full_text": False
	}
	"""

	# Set the correct endpoint and task for the model

	def ra(user_question):
	#prompt = f"You know things about League of Legends. Please correct the following question for grammar and clarity. Do not give explaination."
	#prompt = f"You know things about League of Legends. Please ONLY correct the following question for grammar and clarity. Do NOT give explaination:\n{user_question}\nCorrected question:"
	#You are an expert in League of Legends. You correct grammar and clarity issues in questions. Only return the corrected question itself—do not add explanations, extra text, or anything in parentheses.
	#res1 = client_bis.text_generation(f"You know things about League of Legends. Please correct the following question for grammar and clarity.Do not give explaination:\n{user_question}\nCorrected question:", stream =False,max_new_tokens= 10, temperature = 0.1 )
	messages_q=[
	{"role": "system", "content": "You are familiar with League of Legends lore. You help correct grammar and clarity without giving additional explanations."},
	#{"role": "system", "content": f"""You are a helpful AI that corrects grammar and clarity without giving additional explanations. You only return the corrected question itself.
	#"""},
	{"role": "user", "content": f"Fix any grammar or clarity issues in the following question. Only return the corrected question itself.\n\n{user_question}"}
	#Fix any grammar or clarity issues in the following question. Only return the output itself.\n\n{user_question}\nOutput:
	#{"role": "user", "content": f"Please fix any grammar and clarity issues in the following question: {user_question}"}
	]
	print(messages_q)
	res = client.chat_completion(messages_q,
	max_tokens=30,
	stream=False,
	temperature=0.1,
	stop = ['('])
	return copy.deepcopy(res["choices"][0]["message"]["content"])

	# chain = RunnablePassthrough() \| RunnableLambda(ra) \| prompt_template \| client.chat_completion() \| StrOutputParser() for notebook

	"""-------------------------------------------------------------------"""
	def respond(
	message,
	history: list[tuple[str, str]],
	max_tokens,
	temperature,
	top_p,
	):
	new_query = ra(message)
	print("old: ",new_query)
	#new_query = str(new_query.split("Output: ", 1)[-1] if "Output: " in new_query else new_query)
	if new_query[-1] == "(":
	new_query = new_query[:-1]
	print("new: ",new_query)
	system_message = f"""You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.

	Instructions:
	1. Use only the provided context to answer. Do not make assumptions beyond it.
	2. If a question is outside LoL lore, respond: "Please ask something related to League of Legends lore."
	3. If the context lacks a clear answer, respond: "I'm unsure based on the provided context."
	4. Answer up to two sentences, ensuring clarity and completeness.

	"""
	system_message = f"""
	You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.=
	Instructions:
	1. Only use the context provided below to answer the question. Reference the context directly for accuracy.
	2. If the question is outside the scope of League of Legends lore, respond: "Please ask something related to League of Legends lore."
	3. If the provided context does not provide a clear answer, respond: "I'm unsure based on the provided context."

	"""
	print(system_message)
	messages = [{"role": "system", "content": "You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game."}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})
	messages.append({"role": "user", "content": "Context: "+str(retriever(new_query))+"\n\nQuestion: "+new_query +"\n\nAnswer: "})
	print("Context: "+str(retriever(new_query))+"\n\nQuestion: "+new_query +"\n\nAnswer: ")
	response = ""

	for message in client.chat_completion(
	messages,
	max_tokens=200,
	stream=True,
	temperature=0.1
	):
	token = message.choices[0].delta.content

	response += token
	yield response


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	with gr.Blocks() as demo:
	gr.Markdown("""
	# League of Legends Lore Chatbot
	Welcome to the LoL Lore Chatbot! 🏆
	Here, you can ask questions about League of Legends champions and their stories.

	Example Question:
	Why does Kayn have different forms?
	""")

	chat = gr.ChatInterface(respond)


	if __name__ == "__main__":
	demo.launch()