| import os |
| import gradio as gr |
| from openai import OpenAI |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| GENERATION_MODELS = [ |
| "gpt-4.1", |
| "gpt-4.1-mini", |
| "gpt-4.1-nano", |
| "gpt-4o", |
| "gpt-4o-mini", |
| ] |
|
|
| REASONING_MODELS = [ |
| "gpt-5.5", |
| "gpt-5.1", |
| "gpt-5-mini", |
| "gpt-5-pro", |
| "o3", |
| "o4-mini", |
| ] |
|
|
| DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4.1") |
| DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5") |
|
|
|
|
| def get_openai_client(): |
| api_key = os.getenv("OPENAI_API_KEY") |
|
|
| if not api_key: |
| raise ValueError( |
| "OPENAI_API_KEY is missing. " |
| "Please add it in Hugging Face Spaces → Settings → Secrets." |
| ) |
|
|
| return OpenAI(api_key=api_key) |
|
|
|
|
| def extract_output_text(response): |
| """ |
| Extracts text safely from the OpenAI Responses API response. |
| """ |
| if hasattr(response, "output_text") and response.output_text: |
| return response.output_text |
|
|
| chunks = [] |
|
|
| if hasattr(response, "output") and response.output: |
| for item in response.output: |
| if hasattr(item, "content") and item.content: |
| for content in item.content: |
| if hasattr(content, "text") and content.text: |
| chunks.append(content.text) |
|
|
| return "\n".join(chunks).strip() |
|
|
|
|
| def run_generation_model( |
| prompt, |
| model, |
| system_message, |
| temperature, |
| top_p, |
| max_output_tokens, |
| show_settings, |
| ): |
| """ |
| Function for normal generation models only. |
| |
| These models are used for writing, summarization, rewriting, |
| marketing copy, explanations, and standard chatbot-style tasks. |
| |
| Important: |
| - We only pass parameters that are safe for this tab. |
| - We do not pass frequency_penalty or presence_penalty. |
| - We do not pass reasoning.effort here. |
| """ |
| try: |
| client = get_openai_client() |
|
|
| request_params = { |
| "model": model, |
| "instructions": system_message, |
| "input": prompt, |
| "temperature": float(temperature), |
| "top_p": float(top_p), |
| "max_output_tokens": int(max_output_tokens), |
| } |
|
|
| response = client.responses.create(**request_params) |
| output = extract_output_text(response) |
|
|
| if not output: |
| output = "No output generated." |
|
|
| if show_settings: |
| settings = f"""GENERATION SETTINGS |
| ------------------- |
| Model: {model} |
| Temperature: {temperature} |
| Top P: {top_p} |
| Max Output Tokens: {max_output_tokens} |
| |
| Note: |
| Frequency penalty and presence penalty are intentionally not sent in this app |
| to avoid unsupported-parameter errors. |
| |
| OUTPUT |
| ------ |
| """ |
| return settings + output |
|
|
| return output |
|
|
| except Exception as e: |
| return f"Error:\n{str(e)}" |
|
|
|
|
| def get_safe_reasoning_effort(model, selected_effort): |
| """ |
| Reasoning effort support differs by model. |
| |
| To avoid errors: |
| - gpt-5-pro only supports high. |
| - gpt-5.1 supports none, low, medium, high. |
| - Most other reasoning models safely use low, medium, high. |
| """ |
| if model == "gpt-5-pro": |
| return "high" |
|
|
| if model == "gpt-5.1": |
| allowed = ["none", "low", "medium", "high"] |
| return selected_effort if selected_effort in allowed else "medium" |
|
|
| allowed = ["low", "medium", "high"] |
| return selected_effort if selected_effort in allowed else "medium" |
|
|
|
|
| def run_reasoning_model( |
| prompt, |
| model, |
| reasoning_effort, |
| max_output_tokens, |
| show_settings, |
| ): |
| """ |
| Function for reasoning models only. |
| |
| These models are used for: |
| - Complex analysis |
| - Coding |
| - Multi-step reasoning |
| - Architecture decisions |
| - Trade-off analysis |
| - Agent planning |
| |
| Important: |
| - We pass reasoning.effort here. |
| - We do not pass temperature/top_p here. |
| - We do not pass frequency_penalty or presence_penalty. |
| """ |
| try: |
| client = get_openai_client() |
|
|
| safe_effort = get_safe_reasoning_effort(model, reasoning_effort) |
|
|
| request_params = { |
| "model": model, |
| "input": prompt, |
| "reasoning": { |
| "effort": safe_effort |
| }, |
| "max_output_tokens": int(max_output_tokens), |
| } |
|
|
| response = client.responses.create(**request_params) |
| output = extract_output_text(response) |
|
|
| if not output: |
| output = ( |
| "No visible output generated. " |
| "Try increasing Max Output Tokens because reasoning models use " |
| "some tokens internally before producing the final answer." |
| ) |
|
|
| if show_settings: |
| settings = f"""REASONING SETTINGS |
| ------------------ |
| Model: {model} |
| Selected Reasoning Effort: {reasoning_effort} |
| Used Reasoning Effort: {safe_effort} |
| Max Output Tokens: {max_output_tokens} |
| |
| Note: |
| Temperature, top_p, frequency penalty, and presence penalty are intentionally |
| not sent for reasoning models to avoid unsupported-parameter errors. |
| |
| OUTPUT |
| ------ |
| """ |
| return settings + output |
|
|
| return output |
|
|
| except Exception as e: |
| return f"Error:\n{str(e)}" |
|
|
|
|
| CSS = """ |
| .gradio-container { |
| max-width: 1200px !important; |
| margin: auto !important; |
| } |
| |
| .main-title { |
| text-align: center; |
| margin-bottom: 20px; |
| } |
| |
| .helper-box { |
| padding: 14px; |
| border-radius: 12px; |
| background: #f7f7f8; |
| border: 1px solid #e5e7eb; |
| margin-bottom: 16px; |
| } |
| |
| .output-box textarea { |
| font-family: monospace !important; |
| } |
| """ |
|
|
|
|
| with gr.Blocks() as demo: |
| gr.Markdown( |
| """ |
| <div class="main-title"> |
| |
| # LLM Model Controls Demo |
| |
| Part of Decoding Data Science AI Residency A clean Gradio app for testing generation models and reasoning models separately. |
| |
| </div> |
| """ |
| ) |
|
|
| gr.Markdown( |
| """ |
| <div class="helper-box"> |
| |
| <b>Setup:</b> Prompting is not Enough |
| |
| |
| |
| </div> |
| """ |
| ) |
|
|
| with gr.Tab("Generation Models"): |
| gr.Markdown( |
| """ |
| Use this tab for normal text generation tasks like LinkedIn posts, summaries, explanations, rewriting, and simple Q&A. |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gen_prompt = gr.Textbox( |
| lines=7, |
| label="Prompt", |
| value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.", |
| ) |
|
|
| gen_model = gr.Dropdown( |
| choices=GENERATION_MODELS, |
| label="Generation Model", |
| value=DEFAULT_GENERATION_MODEL |
| if DEFAULT_GENERATION_MODEL in GENERATION_MODELS |
| else "gpt-4.1", |
| ) |
|
|
| gen_system_message = gr.Textbox( |
| lines=3, |
| label="System Message", |
| value="You are a helpful AI instructor. Keep answers clear and practical.", |
| ) |
|
|
| gen_temperature = gr.Slider( |
| minimum=0.0, |
| maximum=2.0, |
| step=0.01, |
| value=0.7, |
| label="Temperature", |
| ) |
|
|
| gen_top_p = gr.Slider( |
| minimum=0.0, |
| maximum=1.0, |
| step=0.01, |
| value=1.0, |
| label="Top P", |
| ) |
|
|
| gen_max_output_tokens = gr.Slider( |
| minimum=50, |
| maximum=4000, |
| step=50, |
| value=500, |
| label="Max Output Tokens", |
| ) |
|
|
| gen_show_settings = gr.Checkbox( |
| value=True, |
| label="Show Settings", |
| ) |
|
|
| gen_button = gr.Button("Generate", variant="primary") |
|
|
| with gr.Column(scale=1): |
| gen_output = gr.Textbox( |
| lines=22, |
| label="Output", |
| elem_classes=["output-box"], |
| ) |
|
|
| gen_button.click( |
| fn=run_generation_model, |
| inputs=[ |
| gen_prompt, |
| gen_model, |
| gen_system_message, |
| gen_temperature, |
| gen_top_p, |
| gen_max_output_tokens, |
| gen_show_settings, |
| ], |
| outputs=gen_output, |
| ) |
|
|
| with gr.Tab("Reasoning Models"): |
| gr.Markdown( |
| """ |
| Use this tab for complex tasks like architecture decisions, agent planning, debugging, code reasoning, and trade-off analysis. |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| reason_prompt = gr.Textbox( |
| lines=9, |
| label="Prompt", |
| value="""A telecom company wants to build an AI customer support assistant. |
| |
| They have: |
| - 50,000 past support tickets |
| - A FAQ website |
| - Billing policies |
| - A small developer team |
| |
| Should they start with: |
| 1. Simple prompt-based chatbot |
| 2. RAG chatbot |
| 3. Fine-tuning |
| 4. Agent with tools |
| |
| Give a practical recommendation with trade-offs.""", |
| ) |
|
|
| reason_model = gr.Dropdown( |
| choices=REASONING_MODELS, |
| label="Reasoning Model", |
| value=DEFAULT_REASONING_MODEL |
| if DEFAULT_REASONING_MODEL in REASONING_MODELS |
| else "gpt-5.5", |
| ) |
|
|
| reason_effort = gr.Radio( |
| choices=["none", "low", "medium", "high"], |
| label="Reasoning Effort", |
| value="medium", |
| ) |
|
|
| reason_max_output_tokens = gr.Slider( |
| minimum=100, |
| maximum=12000, |
| step=100, |
| value=2000, |
| label="Max Output Tokens", |
| ) |
|
|
| reason_show_settings = gr.Checkbox( |
| value=True, |
| label="Show Settings", |
| ) |
|
|
| reason_button = gr.Button("Reason", variant="primary") |
|
|
| with gr.Column(scale=1): |
| reason_output = gr.Textbox( |
| lines=22, |
| label="Output", |
| elem_classes=["output-box"], |
| ) |
|
|
| reason_button.click( |
| fn=run_reasoning_model, |
| inputs=[ |
| reason_prompt, |
| reason_model, |
| reason_effort, |
| reason_max_output_tokens, |
| reason_show_settings, |
| ], |
| outputs=reason_output, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch( |
| theme=gr.themes.Soft(), |
| css=CSS, |
| server_name="0.0.0.0", |
| server_port=int(os.getenv("PORT", 7860)), |
| debug=False, |
| share=False, |
| ) |