Spaces:

IoannisKat1
/

AILA_Workspace_v2

Sleeping

App Files Files Community

AILA_Workspace_v2 / app.py

IoannisKat1

Update app.py

9c2f5b2 verified 7 months ago

raw

history blame contribute delete

3.54 kB

	# from unsloth import FastLanguageModel
	# import torch
	import os
	from llama_index.core.storage.storage_context import StorageContext
	from llama_index.core.indices.loading import load_index_from_storage
	from openai import OpenAI
	import gradio as gr

	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	# model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
	# client = FastLanguageModel.for_inference(model)

	dir = 'aila_indices'

	# Initialize OpenAI client
	client = OpenAI()
	storage_context = StorageContext.from_defaults(persist_dir=dir)
	index = load_index_from_storage(storage_context)
	query_engine = index.as_query_engine(similarity_top_k=8)

	# def generate_response(instruction,chat_history):
	# """Generates a response using your fine-tuned model."""
	# # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
	# prompt = f"""### Instruction:
	# Answer the following question.
	# ### Question:
	# {instruction}
	# Provide a unique, concise, and non-repetitive answer.
	# ### Answer:"""

	# inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
	# with torch.no_grad():
	# outputs = model.generate(**inputs,early_stopping=True,min_length=50,length_penalty=2,do_sample=True,max_new_tokens=300,
	# top_p=0.95,
	# top_k=50,
	# temperature=0.7,
	# repetition_penalty=1.2,
	# num_return_sequences=1
	# )

	# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# response = response.split("### Answer:")[-1]
	# return response

	def generate_response_rag_index(instruction,chat_history):
	response = query_engine.query(instruction)
	return str(response)


	def update_chat_history(chat_history, user_message, bot_message):
	"""Update chat history to maintain relevance and avoid excessive growth."""
	chat_history['user'].append(user_message)
	chat_history['bot'].append(bot_message)
	# Keep only the last N interactions
	if len(chat_history['user']) > 5:
	chat_history['user'] = chat_history['user'][-5:]
	chat_history['bot'] = chat_history['bot'][-5:]
	return chat_history

	def chatbot(input_text,chat_history):
	messages = {
	"user": [],
	"bot": [],
	}

	for user_msg, bot_msg in chat_history:
	messages["user"].append(user_msg)
	messages["bot"].append(bot_msg)

	# bot_response = generate_response(input_text,messages)
	bot_response = generate_response_rag_index(input_text,messages)
	chat_history.append(("User: " + input_text, bot_response))
	messages = update_chat_history(messages, input_text, bot_response)
	return "", chat_history

	with gr.Blocks() as demo:
	gr.Markdown('## AILA INTERFACE DEMO')

	with gr.Row():

	gr.Image(value="up_2017_logo_en.png", interactive=False, label="Upatras Logo",width=150,height=100)
	gr.Image(value="aila_new.png", interactive=False, label="AILA project Logo",width=150,height=100)
	gr.Image(value="banner-horizontal-default-en.png", interactive=False, label="AUTH Logo",width=150,height=100)

	with gr.Row():

	user_input = gr.Textbox(
	placeholder = "Type your message here...",
	label = "Your Message",
	lines = 1
	)

	submit_button = gr.Button('Submit')

	chat_history = gr.Chatbot()



	submit_button.click(
	chatbot,
	inputs = [user_input,chat_history],
	outputs = [user_input, chat_history]
	)


	demo.launch()