Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

TestInferenceAPI / app.py

alexkueck

Update app.py

d0b0ea1 about 1 year ago

raw

history blame

3.77 kB

	from huggingface_hub import InferenceClient, login
	from transformers import AutoTokenizer
	from langchain.chat_models import ChatOpenAI
	import os, sys, json
	import gradio as gr
	from langchain.evaluation import load_evaluator
	from pprint import pprint as print


	# access token with permission to access the model and PRO subscription
	#HUGGINGFACEHUB_API_TOKEN = os.getenv("HF_ACCESS_READ")
	OAI_API_KEY=os.getenv("OPENAI_API_KEY")
	login(token=os.environ["HF_ACCESS_READ"])

	# tokenizer for generating prompt
	print ("Tokenizer")
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")

	# inference client
	print ("Inf.Client")
	client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")

	# generate function
	def generate(text, history):
	#mit RAG
	#später entsprechend mit Vektorstore...
	context="Nuremberg is the second-largest city of the German state of Bavaria after its capital Munich, and its 541,000 inhabitants make it the 14th-largest city in Germany. On the Pegnitz River (from its confluence with the Rednitz in Fürth onwards: Regnitz, a tributary of the River Main) and the Rhine–Main–Danube Canal, it lies in the Bavarian administrative region of Middle Franconia, and is the largest city and the unofficial capital of Franconia. Nuremberg forms with the neighbouring cities of Fürth, Erlangen and Schwabach a continuous conurbation with a total population of 812,248 (2022), which is the heart of the urban area region with around 1.4 million inhabitants,[4] while the larger Nuremberg Metropolitan Region has approximately 3.6 million inhabitants. The city lies about 170 kilometres (110 mi) north of Munich. It is the largest city in the East Franconian dialect area."

	prompt = f"""Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
	{context} Question: {question}"""

	#zusammengesetzte Anfrage an Modell...
	payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
	res = client.text_generation(
	payload,
	do_sample=True,
	return_full_text=False,
	max_new_tokens=2048,
	top_p=0.9,
	temperature=0.6,
	)
	#zum Evaluieren:
	# custom eli5 criteria
	custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}

	eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
	print ("eval_result:............ ")
	print(eval_result)
	return res.strip()

	########################################
	#Evaluation
	########################################
	evaluation_llm = ChatOpenAI(model="gpt-4")

	# create evaluator
	evaluator = load_evaluator("criteria", criteria="conciseness", llm=evaluation_llm)


	################################################
	#GUI
	###############################################
	#Beschreibung oben in GUI
	################################################



	chatbot_stream = gr.Chatbot()

	chat_interface_stream = gr.ChatInterface(fn=generate,
	title = "ChatGPT vom LI",
	theme="soft",
	chatbot=chatbot_stream,
	retry_btn="🔄 Wiederholen",
	undo_btn="↩️ Letztes löschen",
	clear_btn="🗑️ Verlauf löschen",
	submit_btn = "Abschicken",
	)

	with gr.Blocks() as demo:
	with gr.Tab("Chatbot"):
	#chatbot_stream.like(vote, None, None)
	chat_interface_stream.queue().launch()