Spaces:

decodingdatascience
/

LLMGenerationControls

Running

App Files Files Community

LLMGenerationControls / app.py

decodingdatascience

Update app.py

4a97c46 verified 11 days ago

raw

history blame contribute delete

11.4 kB

	import os
	import gradio as gr
	from openai import OpenAI


	# ============================================================
	# Hugging Face Spaces Secret
	# ============================================================
	# Add this in Hugging Face:
	# Space → Settings → Secrets → New secret
	#
	# Name: OPENAI_API_KEY
	# Value: your OpenAI API key
	# ============================================================


	GENERATION_MODELS = [
	"gpt-4.1",
	"gpt-4.1-mini",
	"gpt-4.1-nano",
	"gpt-4o",
	"gpt-4o-mini",
	]

	REASONING_MODELS = [
	"gpt-5.5",
	"gpt-5.1",
	"gpt-5-mini",
	"gpt-5-pro",
	"o3",
	"o4-mini",
	]

	DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4.1")
	DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5")


	def get_openai_client():
	api_key = os.getenv("OPENAI_API_KEY")

	if not api_key:
	raise ValueError(
	"OPENAI_API_KEY is missing. "
	"Please add it in Hugging Face Spaces → Settings → Secrets."
	)

	return OpenAI(api_key=api_key)


	def extract_output_text(response):
	"""
	Extracts text safely from the OpenAI Responses API response.
	"""
	if hasattr(response, "output_text") and response.output_text:
	return response.output_text

	chunks = []

	if hasattr(response, "output") and response.output:
	for item in response.output:
	if hasattr(item, "content") and item.content:
	for content in item.content:
	if hasattr(content, "text") and content.text:
	chunks.append(content.text)

	return "\n".join(chunks).strip()


	def run_generation_model(
	prompt,
	model,
	system_message,
	temperature,
	top_p,
	max_output_tokens,
	show_settings,
	):
	"""
	Function for normal generation models only.

	These models are used for writing, summarization, rewriting,
	marketing copy, explanations, and standard chatbot-style tasks.

	Important:
	- We only pass parameters that are safe for this tab.
	- We do not pass frequency_penalty or presence_penalty.
	- We do not pass reasoning.effort here.
	"""
	try:
	client = get_openai_client()

	request_params = {
	"model": model,
	"instructions": system_message,
	"input": prompt,
	"temperature": float(temperature),
	"top_p": float(top_p),
	"max_output_tokens": int(max_output_tokens),
	}

	response = client.responses.create(**request_params)
	output = extract_output_text(response)

	if not output:
	output = "No output generated."

	if show_settings:
	settings = f"""GENERATION SETTINGS
	-------------------
	Model: {model}
	Temperature: {temperature}
	Top P: {top_p}
	Max Output Tokens: {max_output_tokens}

	Note:
	Frequency penalty and presence penalty are intentionally not sent in this app
	to avoid unsupported-parameter errors.

	OUTPUT
	------
	"""
	return settings + output

	return output

	except Exception as e:
	return f"Error:\n{str(e)}"


	def get_safe_reasoning_effort(model, selected_effort):
	"""
	Reasoning effort support differs by model.

	To avoid errors:
	- gpt-5-pro only supports high.
	- gpt-5.1 supports none, low, medium, high.
	- Most other reasoning models safely use low, medium, high.
	"""
	if model == "gpt-5-pro":
	return "high"

	if model == "gpt-5.1":
	allowed = ["none", "low", "medium", "high"]
	return selected_effort if selected_effort in allowed else "medium"

	allowed = ["low", "medium", "high"]
	return selected_effort if selected_effort in allowed else "medium"


	def run_reasoning_model(
	prompt,
	model,
	reasoning_effort,
	max_output_tokens,
	show_settings,
	):
	"""
	Function for reasoning models only.

	These models are used for:
	- Complex analysis
	- Coding
	- Multi-step reasoning
	- Architecture decisions
	- Trade-off analysis
	- Agent planning

	Important:
	- We pass reasoning.effort here.
	- We do not pass temperature/top_p here.
	- We do not pass frequency_penalty or presence_penalty.
	"""
	try:
	client = get_openai_client()

	safe_effort = get_safe_reasoning_effort(model, reasoning_effort)

	request_params = {
	"model": model,
	"input": prompt,
	"reasoning": {
	"effort": safe_effort
	},
	"max_output_tokens": int(max_output_tokens),
	}

	response = client.responses.create(**request_params)
	output = extract_output_text(response)

	if not output:
	output = (
	"No visible output generated. "
	"Try increasing Max Output Tokens because reasoning models use "
	"some tokens internally before producing the final answer."
	)

	if show_settings:
	settings = f"""REASONING SETTINGS
	------------------
	Model: {model}
	Selected Reasoning Effort: {reasoning_effort}
	Used Reasoning Effort: {safe_effort}
	Max Output Tokens: {max_output_tokens}

	Note:
	Temperature, top_p, frequency penalty, and presence penalty are intentionally
	not sent for reasoning models to avoid unsupported-parameter errors.

	OUTPUT
	------
	"""
	return settings + output

	return output

	except Exception as e:
	return f"Error:\n{str(e)}"


	CSS = """
	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}

	.main-title {
	text-align: center;
	margin-bottom: 20px;
	}

	.helper-box {
	padding: 14px;
	border-radius: 12px;
	background: #f7f7f8;
	border: 1px solid #e5e7eb;
	margin-bottom: 16px;
	}

	.output-box textarea {
	font-family: monospace !important;
	}
	"""


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	<div class="main-title">

	# LLM Model Controls Demo

	Part of Decoding Data Science AI Residency A clean Gradio app for testing generation models and reasoning models separately.

	</div>
	"""
	)

	gr.Markdown(
	"""
	<div class="helper-box">

	<b>Setup:</b> Prompting is not Enough



	</div>
	"""
	)

	with gr.Tab("Generation Models"):
	gr.Markdown(
	"""
	Use this tab for normal text generation tasks like LinkedIn posts, summaries, explanations, rewriting, and simple Q&A.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gen_prompt = gr.Textbox(
	lines=7,
	label="Prompt",
	value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.",
	)

	gen_model = gr.Dropdown(
	choices=GENERATION_MODELS,
	label="Generation Model",
	value=DEFAULT_GENERATION_MODEL
	if DEFAULT_GENERATION_MODEL in GENERATION_MODELS
	else "gpt-4.1",
	)

	gen_system_message = gr.Textbox(
	lines=3,
	label="System Message",
	value="You are a helpful AI instructor. Keep answers clear and practical.",
	)

	gen_temperature = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	step=0.01,
	value=0.7,
	label="Temperature",
	)

	gen_top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	value=1.0,
	label="Top P",
	)

	gen_max_output_tokens = gr.Slider(
	minimum=50,
	maximum=4000,
	step=50,
	value=500,
	label="Max Output Tokens",
	)

	gen_show_settings = gr.Checkbox(
	value=True,
	label="Show Settings",
	)

	gen_button = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	gen_output = gr.Textbox(
	lines=22,
	label="Output",
	elem_classes=["output-box"],
	)

	gen_button.click(
	fn=run_generation_model,
	inputs=[
	gen_prompt,
	gen_model,
	gen_system_message,
	gen_temperature,
	gen_top_p,
	gen_max_output_tokens,
	gen_show_settings,
	],
	outputs=gen_output,
	)

	with gr.Tab("Reasoning Models"):
	gr.Markdown(
	"""
	Use this tab for complex tasks like architecture decisions, agent planning, debugging, code reasoning, and trade-off analysis.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	reason_prompt = gr.Textbox(
	lines=9,
	label="Prompt",
	value="""A telecom company wants to build an AI customer support assistant.

	They have:
	- 50,000 past support tickets
	- A FAQ website
	- Billing policies
	- A small developer team

	Should they start with:
	1. Simple prompt-based chatbot
	2. RAG chatbot
	3. Fine-tuning
	4. Agent with tools

	Give a practical recommendation with trade-offs.""",
	)

	reason_model = gr.Dropdown(
	choices=REASONING_MODELS,
	label="Reasoning Model",
	value=DEFAULT_REASONING_MODEL
	if DEFAULT_REASONING_MODEL in REASONING_MODELS
	else "gpt-5.5",
	)

	reason_effort = gr.Radio(
	choices=["none", "low", "medium", "high"],
	label="Reasoning Effort",
	value="medium",
	)

	reason_max_output_tokens = gr.Slider(
	minimum=100,
	maximum=12000,
	step=100,
	value=2000,
	label="Max Output Tokens",
	)

	reason_show_settings = gr.Checkbox(
	value=True,
	label="Show Settings",
	)

	reason_button = gr.Button("Reason", variant="primary")

	with gr.Column(scale=1):
	reason_output = gr.Textbox(
	lines=22,
	label="Output",
	elem_classes=["output-box"],
	)

	reason_button.click(
	fn=run_reasoning_model,
	inputs=[
	reason_prompt,
	reason_model,
	reason_effort,
	reason_max_output_tokens,
	reason_show_settings,
	],
	outputs=reason_output,
	)


	if __name__ == "__main__":
	demo.launch(
	theme=gr.themes.Soft(),
	css=CSS,
	server_name="0.0.0.0",
	server_port=int(os.getenv("PORT", 7860)),
	debug=False,
	share=False,
	)