decodingdatascience's picture
Update app.py
4a97c46 verified
import os
import gradio as gr
from openai import OpenAI
# ============================================================
# Hugging Face Spaces Secret
# ============================================================
# Add this in Hugging Face:
# Space → Settings → Secrets → New secret
#
# Name: OPENAI_API_KEY
# Value: your OpenAI API key
# ============================================================
GENERATION_MODELS = [
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
"gpt-4o",
"gpt-4o-mini",
]
REASONING_MODELS = [
"gpt-5.5",
"gpt-5.1",
"gpt-5-mini",
"gpt-5-pro",
"o3",
"o4-mini",
]
DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4.1")
DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5")
def get_openai_client():
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError(
"OPENAI_API_KEY is missing. "
"Please add it in Hugging Face Spaces → Settings → Secrets."
)
return OpenAI(api_key=api_key)
def extract_output_text(response):
"""
Extracts text safely from the OpenAI Responses API response.
"""
if hasattr(response, "output_text") and response.output_text:
return response.output_text
chunks = []
if hasattr(response, "output") and response.output:
for item in response.output:
if hasattr(item, "content") and item.content:
for content in item.content:
if hasattr(content, "text") and content.text:
chunks.append(content.text)
return "\n".join(chunks).strip()
def run_generation_model(
prompt,
model,
system_message,
temperature,
top_p,
max_output_tokens,
show_settings,
):
"""
Function for normal generation models only.
These models are used for writing, summarization, rewriting,
marketing copy, explanations, and standard chatbot-style tasks.
Important:
- We only pass parameters that are safe for this tab.
- We do not pass frequency_penalty or presence_penalty.
- We do not pass reasoning.effort here.
"""
try:
client = get_openai_client()
request_params = {
"model": model,
"instructions": system_message,
"input": prompt,
"temperature": float(temperature),
"top_p": float(top_p),
"max_output_tokens": int(max_output_tokens),
}
response = client.responses.create(**request_params)
output = extract_output_text(response)
if not output:
output = "No output generated."
if show_settings:
settings = f"""GENERATION SETTINGS
-------------------
Model: {model}
Temperature: {temperature}
Top P: {top_p}
Max Output Tokens: {max_output_tokens}
Note:
Frequency penalty and presence penalty are intentionally not sent in this app
to avoid unsupported-parameter errors.
OUTPUT
------
"""
return settings + output
return output
except Exception as e:
return f"Error:\n{str(e)}"
def get_safe_reasoning_effort(model, selected_effort):
"""
Reasoning effort support differs by model.
To avoid errors:
- gpt-5-pro only supports high.
- gpt-5.1 supports none, low, medium, high.
- Most other reasoning models safely use low, medium, high.
"""
if model == "gpt-5-pro":
return "high"
if model == "gpt-5.1":
allowed = ["none", "low", "medium", "high"]
return selected_effort if selected_effort in allowed else "medium"
allowed = ["low", "medium", "high"]
return selected_effort if selected_effort in allowed else "medium"
def run_reasoning_model(
prompt,
model,
reasoning_effort,
max_output_tokens,
show_settings,
):
"""
Function for reasoning models only.
These models are used for:
- Complex analysis
- Coding
- Multi-step reasoning
- Architecture decisions
- Trade-off analysis
- Agent planning
Important:
- We pass reasoning.effort here.
- We do not pass temperature/top_p here.
- We do not pass frequency_penalty or presence_penalty.
"""
try:
client = get_openai_client()
safe_effort = get_safe_reasoning_effort(model, reasoning_effort)
request_params = {
"model": model,
"input": prompt,
"reasoning": {
"effort": safe_effort
},
"max_output_tokens": int(max_output_tokens),
}
response = client.responses.create(**request_params)
output = extract_output_text(response)
if not output:
output = (
"No visible output generated. "
"Try increasing Max Output Tokens because reasoning models use "
"some tokens internally before producing the final answer."
)
if show_settings:
settings = f"""REASONING SETTINGS
------------------
Model: {model}
Selected Reasoning Effort: {reasoning_effort}
Used Reasoning Effort: {safe_effort}
Max Output Tokens: {max_output_tokens}
Note:
Temperature, top_p, frequency penalty, and presence penalty are intentionally
not sent for reasoning models to avoid unsupported-parameter errors.
OUTPUT
------
"""
return settings + output
return output
except Exception as e:
return f"Error:\n{str(e)}"
CSS = """
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
.main-title {
text-align: center;
margin-bottom: 20px;
}
.helper-box {
padding: 14px;
border-radius: 12px;
background: #f7f7f8;
border: 1px solid #e5e7eb;
margin-bottom: 16px;
}
.output-box textarea {
font-family: monospace !important;
}
"""
with gr.Blocks() as demo:
gr.Markdown(
"""
<div class="main-title">
# LLM Model Controls Demo
Part of Decoding Data Science AI Residency A clean Gradio app for testing generation models and reasoning models separately.
</div>
"""
)
gr.Markdown(
"""
<div class="helper-box">
<b>Setup:</b> Prompting is not Enough
</div>
"""
)
with gr.Tab("Generation Models"):
gr.Markdown(
"""
Use this tab for normal text generation tasks like LinkedIn posts, summaries, explanations, rewriting, and simple Q&A.
"""
)
with gr.Row():
with gr.Column(scale=1):
gen_prompt = gr.Textbox(
lines=7,
label="Prompt",
value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.",
)
gen_model = gr.Dropdown(
choices=GENERATION_MODELS,
label="Generation Model",
value=DEFAULT_GENERATION_MODEL
if DEFAULT_GENERATION_MODEL in GENERATION_MODELS
else "gpt-4.1",
)
gen_system_message = gr.Textbox(
lines=3,
label="System Message",
value="You are a helpful AI instructor. Keep answers clear and practical.",
)
gen_temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
step=0.01,
value=0.7,
label="Temperature",
)
gen_top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
step=0.01,
value=1.0,
label="Top P",
)
gen_max_output_tokens = gr.Slider(
minimum=50,
maximum=4000,
step=50,
value=500,
label="Max Output Tokens",
)
gen_show_settings = gr.Checkbox(
value=True,
label="Show Settings",
)
gen_button = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
gen_output = gr.Textbox(
lines=22,
label="Output",
elem_classes=["output-box"],
)
gen_button.click(
fn=run_generation_model,
inputs=[
gen_prompt,
gen_model,
gen_system_message,
gen_temperature,
gen_top_p,
gen_max_output_tokens,
gen_show_settings,
],
outputs=gen_output,
)
with gr.Tab("Reasoning Models"):
gr.Markdown(
"""
Use this tab for complex tasks like architecture decisions, agent planning, debugging, code reasoning, and trade-off analysis.
"""
)
with gr.Row():
with gr.Column(scale=1):
reason_prompt = gr.Textbox(
lines=9,
label="Prompt",
value="""A telecom company wants to build an AI customer support assistant.
They have:
- 50,000 past support tickets
- A FAQ website
- Billing policies
- A small developer team
Should they start with:
1. Simple prompt-based chatbot
2. RAG chatbot
3. Fine-tuning
4. Agent with tools
Give a practical recommendation with trade-offs.""",
)
reason_model = gr.Dropdown(
choices=REASONING_MODELS,
label="Reasoning Model",
value=DEFAULT_REASONING_MODEL
if DEFAULT_REASONING_MODEL in REASONING_MODELS
else "gpt-5.5",
)
reason_effort = gr.Radio(
choices=["none", "low", "medium", "high"],
label="Reasoning Effort",
value="medium",
)
reason_max_output_tokens = gr.Slider(
minimum=100,
maximum=12000,
step=100,
value=2000,
label="Max Output Tokens",
)
reason_show_settings = gr.Checkbox(
value=True,
label="Show Settings",
)
reason_button = gr.Button("Reason", variant="primary")
with gr.Column(scale=1):
reason_output = gr.Textbox(
lines=22,
label="Output",
elem_classes=["output-box"],
)
reason_button.click(
fn=run_reasoning_model,
inputs=[
reason_prompt,
reason_model,
reason_effort,
reason_max_output_tokens,
reason_show_settings,
],
outputs=reason_output,
)
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Soft(),
css=CSS,
server_name="0.0.0.0",
server_port=int(os.getenv("PORT", 7860)),
debug=False,
share=False,
)