Spaces:

yashphalle
/

AIMI

Build error

App Files Files Community

AIMI / app.py

yashphalle

Update app.py

b4b6f31 verified 12 months ago

raw

history blame contribute delete

5.34 kB

	import gradio as gr
	import requests
	import re

	# -----------------------------
	# 1. Configure the open-source LLM API endpoint
	# For demonstration, we can use a hosted inference API on Hugging Face
	# that is free to use (to a certain rate limit).
	# -----------------------------
	# Example: We'll use an OpenAssistant model endpoint on HF.
	# You can find many such endpoints in the Hugging Face "Spaces" or "Models" section
	# that provide Inference API for free.

	API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
	#API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/falcon-7b-sft-mix-2000"

	headers={}

	# -----------------------------
	# 2. Define a function to query the model
	# -----------------------------
	def query_model(prompt: str) -> str:
	"""
	Sends the prompt to the Hugging Face Inference Endpoint and returns the model's response.
	"""
	# The payload format for text generation can vary by model. We'll try a general approach:
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 200, # limit response length
	"temperature": 0.7, # moderate creativity
	}
	}
	response = requests.post(API_URL, headers=headers, json=payload)
	if response.status_code == 200:
	model_output = response.json()
	# "generated_text" or "text" can vary depending on the model
	if isinstance(model_output, dict) and "generated_text" in model_output:
	return model_output["generated_text"]
	elif isinstance(model_output, list) and len(model_output) > 0:
	# Some endpoints return a list of dicts
	return model_output[0].get("generated_text", "")
	else:
	return "Error: Unexpected model output format."
	else:
	return f"Error {response.status_code}: {response.text}"

	# -----------------------------
	# 3. Define a simple evaluation function
	# This is a naive "keyword and structure" based scoring for demonstration.
	# -----------------------------
	def evaluate_response(response: str) -> dict:
	"""
	Rates the response on a scale of 1–5 for:
	1) Relevance (R)
	2) Depth (D)
	3) Clarity (C)
	4) References (E)
	5) Overall Quality (Q)
	Returns a dict with individual scores and total.
	"""
	# We'll do a very simplistic approach:
	# Relevance: presence of 'remote work' or synonyms + mention of 'software engineers'
	relevance = 5 if ("remote work" in response.lower() and "software engineer" in response.lower()) else 0

	# Depth: check if the text is > 100 words or includes multiple paragraphs
	word_count = len(response.split())
	depth = 5 if word_count > 150 else (4 if word_count > 80 else 0)

	# Clarity: check if there's a mention of 'introduction'/'conclusion' or if it has multiple paragraphs
	paragraphs = response.strip().split("\n\n")
	clarity = 5 if len(paragraphs) >= 2 else 0

	# References: look for something like 'reference', 'source', 'citation', or an URL
	if re.search(r"reference\|source\|citation\|http", response, re.IGNORECASE):
	references = 5
	else:
	references = 0

	# Overall Quality: a naive combination
	# We'll penalize if the text is too short or if it's obviously incomplete
	if "..." in response[-10:]:
	# If it ends with ... maybe it's incomplete
	overall = 0
	else:
	overall = 5 if (relevance >= 4 and depth >= 4 and references >= 4) else 0

	# Summation
	total_score = relevance + depth + clarity + references + overall

	return {
	"Relevance": relevance,
	"Depth": depth,
	"Clarity": clarity,
	"References": references,
	"Overall": overall,
	"Total": total_score
	}

	# -----------------------------
	# 4. Define the Gradio interface function
	# This is the function that runs when user clicks "Generate & Evaluate"
	# -----------------------------
	def generate_and_evaluate(prompt: str):
	if not prompt.strip():
	return "Please enter a prompt.", {}

	# 1) Get LLM response
	llm_response = query_model(prompt)

	# 2) Evaluate
	scores = evaluate_response(llm_response)

	return llm_response, scores

	# -----------------------------
	# 5. Build the Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# Prompt Evaluator")
	gr.Markdown(
	"Enter a prompt about something"
	"The model will generate a response and our auto-evaluator will score it."
	)

	prompt_input = gr.Textbox(
	label="Enter your prompt here",
	placeholder="E.g., 'Write a short report on the benefits of remote work for software engineers...'",
	lines=3
	)

	generate_button = gr.Button("Generate & Evaluate")

	response_output = gr.Textbox(
	label="LLM Response",
	lines=10
	)

	score_output = gr.JSON(
	label="Evaluation Scores",
	visible=True
	)

	generate_button.click(
	fn=generate_and_evaluate,
	inputs=[prompt_input],
	outputs=[response_output, score_output]
	)

	# -----------------------------
	# 6. Launch
	# -----------------------------
	if __name__ == "__main__":
	demo.launch()