Phi3-ORPO

Paused

App Files Files Community

Phi3-ORPO / app.py

justinj92

Update app.py

da076c6 verified 6 months ago

raw

history blame

2.65 kB

	from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext,SummaryIndex
	from llama_index.llms.huggingface import HuggingFaceLLM
	from llama_index.core import Settings
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	import torch
	import spaces
	import subprocess

	subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'git+https://github.com/huggingface/transformers', '-U'])

	subprocess.run(
	"pip install flash-attn --no-build-isolation",
	env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
	shell=True,
	)


	documents = SimpleDirectoryReader("./data").load_data()
	# vector_index = VectorStoreIndex.from_documents(documents)
	summary_index = SummaryIndex.from_documents(documents)

	def messages_to_prompt(messages):
	prompt = ""
	system_found = False
	for message in messages:
	if message.role == "system":
	prompt += f"<\|system\|>\n{message.content}<\|end\|>\n"
	system_found = True
	elif message.role == "user":
	prompt += f"<\|user\|>\n{message.content}<\|end\|>\n"
	elif message.role == "assistant":
	prompt += f"<\|assistant\|>\n{message.content}<\|end\|>\n"
	else:
	prompt += f"<\|user\|>\n{message.content}<\|end\|>\n"

	# trailing prompt
	prompt += "<\|assistant\|>\n"

	if not system_found:
	prompt = (
	"<\|system\|>\nYou are a helpful AI research assistant built by Justin. You only answer from the context provided.<\|end\|>\n" + prompt
	)

	return prompt

	llm = HuggingFaceLLM(
	model_name="justinj92/phi3-orpo",
	model_kwargs={
	"trust_remote_code": True,
	"torch_dtype": torch.bfloat16
	},
	generate_kwargs={"do_sample": True, "temperature": 0.7},
	tokenizer_name="justinj92/phi3-orpo",
	query_wrapper_prompt=(
	"<\|system\|>\n"
	"You are a helpful AI research assistant built by Justin. You only answer from the context provided.<\|end\|>\n"
	"<\|user\|>\n"
	"{query_str}<\|end\|>\n"
	"<\|assistant\|>\n"
	),
	messages_to_prompt=messages_to_prompt,
	is_chat_model=True,
	)

	Settings.llm = llm
	Settings.embed_model = HuggingFaceEmbedding(
	model_name="BAAI/bge-small-en-v1.5"
	)

	service_context = ServiceContext.from_defaults(
	chunk_size=1024,
	llm=llm,
	embed_model=Settings.embed_model
	)

	index = VectorStoreIndex.from_documents(documents, service_context=service_context)

	query_engine = index.as_query_engine()

	@spaces.GPU
	def predict(input, history):
	response = query_engine.query(input)
	return str(response)

	import gradio as gr
	gr.ChatInterface(predict).launch(share=True)