Spaces:
Sleeping
Sleeping
| import json | |
| import textwrap | |
| from typing import Dict, Any, List, Tuple, Optional | |
| import gradio as gr | |
| import requests | |
| import matplotlib.pyplot as plt | |
| from matplotlib.figure import Figure | |
| # ============================================================ | |
| # LLM CALLER (OPENAI-COMPATIBLE, GPT-4.1 BY DEFAULT) | |
| # ============================================================ | |
| def call_chat_completion( | |
| api_key: str, | |
| base_url: str, | |
| model: str, | |
| system_prompt: str, | |
| user_prompt: str, | |
| max_completion_tokens: int = 1800, | |
| ) -> str: | |
| """ | |
| OpenAI-compatible /v1/chat/completions helper. | |
| - Uses new-style `max_completion_tokens` (for GPT-4.1, GPT-4o, etc.) | |
| - Falls back to legacy `max_tokens` if needed. | |
| - Does NOT send temperature/top_p so it's safe with strict models. | |
| """ | |
| if not api_key: | |
| raise ValueError("LLM API key is required.") | |
| if not base_url: | |
| base_url = "https://api.openai.com" | |
| url = base_url.rstrip("/") + "/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": model, | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| "max_completion_tokens": max_completion_tokens, | |
| } | |
| resp = requests.post(url, headers=headers, json=payload, timeout=60) | |
| # Fallback for providers that still expect `max_tokens` | |
| if resp.status_code == 400 and "max_completion_tokens" in resp.text: | |
| payload.pop("max_completion_tokens", None) | |
| payload["max_tokens"] = max_completion_tokens | |
| resp = requests.post(url, headers=headers, json=payload, timeout=60) | |
| if resp.status_code != 200: | |
| raise RuntimeError( | |
| f"LLM API error {resp.status_code}: {resp.text[:500]}" | |
| ) | |
| data = resp.json() | |
| try: | |
| return data["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"Unexpected LLM response format: {e}\n\n{json.dumps(data, indent=2)}" | |
| ) | |
| # ============================================================ | |
| # FIRECRAWL SCRAPER (OPTIONAL) | |
| # ============================================================ | |
| def call_firecrawl_scrape( | |
| firecrawl_key: str, | |
| url: str, | |
| formats: Optional[List[str]] = None, | |
| ) -> str: | |
| """ | |
| Calls Firecrawl's /v0/scrape endpoint to get cleaned markdown/HTML | |
| for a single URL. | |
| Docs: https://docs.firecrawl.dev/api-reference/endpoint/scrape | |
| """ | |
| if not firecrawl_key: | |
| raise ValueError("Firecrawl API key is missing.") | |
| if not url: | |
| raise ValueError("URL is required to use Firecrawl.") | |
| api_url = "https://api.firecrawl.dev/v0/scrape" | |
| headers = { | |
| "Authorization": f"Bearer {firecrawl_key}", | |
| "Content-Type": "application/json", | |
| } | |
| payload: Dict[str, Any] = {"url": url} | |
| if formats: | |
| payload["formats"] = formats | |
| resp = requests.post(api_url, headers=headers, json=payload, timeout=60) | |
| if resp.status_code != 200: | |
| raise RuntimeError( | |
| f"Firecrawl error {resp.status_code}: {resp.text[:400]}" | |
| ) | |
| data = resp.json() | |
| # Default: try markdown first, fall back to raw HTML or text if structure differs | |
| # Common shape: { "data": { "markdown": "..." } } | |
| if isinstance(data, dict): | |
| # Nested under "data" | |
| inner = data.get("data", {}) | |
| if isinstance(inner, dict): | |
| if "markdown" in inner and isinstance(inner["markdown"], str): | |
| return inner["markdown"] | |
| if "html" in inner and isinstance(inner["html"], str): | |
| return inner["html"] | |
| # If the service changes shape, last fallback: stringify | |
| return json.dumps(data) | |
| # ============================================================ | |
| # ANALYSIS PROMPT + PARSING | |
| # ============================================================ | |
| ANALYSIS_SYSTEM_PROMPT = """ | |
| You are an expert strategy analyst. | |
| Given some web content (or pasted text) plus a short user description, | |
| you will produce a concise, executive-ready analysis in JSON. | |
| Return ONLY JSON using this schema: | |
| { | |
| "executive_summary": "string", | |
| "key_points": ["string", ...], | |
| "opportunities": ["string", ...], | |
| "risks": ["string", ...], | |
| "recommended_actions": [ | |
| { | |
| "title": "string", | |
| "area": "string", | |
| "description": "string" | |
| } | |
| ] | |
| } | |
| """ | |
| def build_analysis_user_prompt( | |
| url: str, | |
| content_preview: str, | |
| user_notes: str, | |
| focus: str, | |
| ) -> str: | |
| truncated = content_preview[:6000] # keep context reasonable | |
| return f""" | |
| Source URL: {url or "N/A"} | |
| Focus area: {focus} | |
| User notes / context: | |
| {user_notes or "N/A"} | |
| Scraped or pasted content (truncated if long): | |
| \"\"\"{truncated}\"\"\" | |
| """.strip() | |
| def parse_analysis_json(raw_text: str) -> Dict[str, Any]: | |
| """Strip fences and extract JSON payload.""" | |
| txt = raw_text.strip() | |
| if txt.startswith("```"): | |
| parts = txt.split("```") | |
| txt = next((p for p in parts if "{" in p and "}" in p), parts[-1]) | |
| first = txt.find("{") | |
| last = txt.rfind("}") | |
| if first == -1 or last == -1: | |
| raise ValueError("No JSON detected in model output.") | |
| return json.loads(txt[first:last + 1]) | |
| def analysis_to_markdown(analysis: Dict[str, Any]) -> str: | |
| """Render the JSON analysis as a short executive brief in Markdown.""" | |
| def bullet(items: List[str]) -> str: | |
| if not items: | |
| return "_None identified._" | |
| return "\n".join(f"- {i}" for i in items) | |
| md: List[str] = [] | |
| md.append("## Executive Summary") | |
| md.append(analysis.get("executive_summary", "N/A")) | |
| md.append("\n## Key Points") | |
| md.append(bullet(analysis.get("key_points", []))) | |
| md.append("\n## Opportunities") | |
| md.append(bullet(analysis.get("opportunities", []))) | |
| md.append("\n## Risks") | |
| md.append(bullet(analysis.get("risks", []))) | |
| md.append("\n## Recommended Actions") | |
| actions = analysis.get("recommended_actions", []) | |
| if not actions: | |
| md.append("_None suggested yet β refine your prompt or focus._") | |
| else: | |
| for idx, act in enumerate(actions, start=1): | |
| title = act.get("title", f"Action {idx}") | |
| area = act.get("area", "General") | |
| desc = act.get("description", "") | |
| md.append(f"### {idx}. {title}") | |
| md.append(f"**Area:** {area}") | |
| md.append(desc or "_No description provided._") | |
| return "\n\n".join(md) | |
| # ============================================================ | |
| # SIMPLE DATA VISUAL β COUNTS BY CATEGORY | |
| # ============================================================ | |
| def analysis_to_figure(analysis: Dict[str, Any]) -> Figure: | |
| """ | |
| Basic bar chart: how many items per category (points, opportunities, risks, actions). | |
| Visualizes "density" of insights. | |
| """ | |
| labels = ["Key Points", "Opportunities", "Risks", "Actions"] | |
| values = [ | |
| len(analysis.get("key_points", []) or []), | |
| len(analysis.get("opportunities", []) or []), | |
| len(analysis.get("risks", []) or []), | |
| len(analysis.get("recommended_actions", []) or []), | |
| ] | |
| fig, ax = plt.subplots(figsize=(5, 3)) | |
| ax.bar(labels, values) | |
| ax.set_ylabel("Count") | |
| ax.set_title("Insight Density by Category") | |
| fig.tight_layout() | |
| return fig | |
| # ============================================================ | |
| # SAMPLE PRESETS | |
| # ============================================================ | |
| SAMPLE_CONFIGS: Dict[str, Dict[str, str]] = { | |
| "AI / Tech Policy Article": { | |
| "url": "https://www.whitehouse.gov/briefing-room/", | |
| "notes": "Focus on AI policy, workforce impact, and org-readiness.", | |
| "focus": "Policy / Regulation", | |
| }, | |
| "Competitor Product Page": { | |
| "url": "https://example.com/", | |
| "notes": "Assume this is a competitor's SaaS pricing page.", | |
| "focus": "Product / Market", | |
| }, | |
| "Industry Research Report": { | |
| "url": "https://example.org/report", | |
| "notes": "Treat as a long-form industry trend report.", | |
| "focus": "Industry / Strategy", | |
| }, | |
| } | |
| def load_sample(name: str) -> Tuple[str, str, str]: | |
| if not name or name not in SAMPLE_CONFIGS: | |
| return "", "", "General insight synthesis" | |
| cfg = SAMPLE_CONFIGS[name] | |
| return cfg["url"], cfg["notes"], cfg["focus"] | |
| # ============================================================ | |
| # MAIN HANDLER FOR GRADIO | |
| # ============================================================ | |
| def generate_brief_ui( | |
| llm_key_state: str, | |
| llm_key_input: str, | |
| base_url: str, | |
| model_name: str, | |
| firecrawl_key: str, | |
| url: str, | |
| pasted_text: str, | |
| user_notes: str, | |
| focus: str, | |
| ): | |
| """ | |
| Master UI handler: | |
| - decides whether to call Firecrawl (if key + URL) | |
| - merges scraped content with pasted text | |
| - calls LLM and renders outputs | |
| """ | |
| llm_key = llm_key_input or llm_key_state | |
| if not llm_key: | |
| return ( | |
| "β οΈ Please enter your LLM API key in the left panel.", | |
| "", | |
| analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}), | |
| llm_key_state, | |
| ) | |
| if not url and not pasted_text: | |
| return ( | |
| "β οΈ Provide at least a URL or some pasted text.", | |
| "", | |
| analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}), | |
| llm_key_state, | |
| ) | |
| # 1. Scrape via Firecrawl if URL + key are set | |
| scraped_content = "" | |
| if url and firecrawl_key: | |
| try: | |
| scraped_content = call_firecrawl_scrape(firecrawl_key, url, formats=["markdown"]) | |
| except Exception as e: | |
| scraped_content = f"(Firecrawl error: {e})" | |
| # 2. Compose content preview (scraped + pasted) | |
| content_preview_parts = [] | |
| if scraped_content: | |
| content_preview_parts.append(scraped_content) | |
| if pasted_text: | |
| content_preview_parts.append("\n\nUser-pasted text:\n" + pasted_text) | |
| content_preview = "\n\n".join(content_preview_parts) | |
| # 3. Build prompt and call LLM | |
| user_prompt = build_analysis_user_prompt(url, content_preview, user_notes, focus) | |
| model = model_name or "gpt-4.1" | |
| try: | |
| raw = call_chat_completion( | |
| api_key=llm_key, | |
| base_url=base_url, | |
| model=model, | |
| system_prompt=ANALYSIS_SYSTEM_PROMPT, | |
| user_prompt=user_prompt, | |
| max_completion_tokens=1800, | |
| ) | |
| analysis = parse_analysis_json(raw) | |
| md = analysis_to_markdown(analysis) | |
| fig = analysis_to_figure(analysis) | |
| json_out = json.dumps(analysis, indent=2, ensure_ascii=False) | |
| return md, json_out, fig, llm_key | |
| except Exception as e: | |
| empty_fig = analysis_to_figure({"key_points": [], "opportunities": [], "risks": [], "recommended_actions": []}) | |
| return f"β Error generating brief:\n\n{e}", "", empty_fig, llm_key_state | |
| # ============================================================ | |
| # GRADIO UI | |
| # ============================================================ | |
| with gr.Blocks(title="ZEN Web Insight Brief Builder") as demo: | |
| gr.Markdown( | |
| """ | |
| # π ZEN Web Insight Brief Builder | |
| Turn any URL (plus optional Firecrawl scrape) into a structured, | |
| actionable executive brief: | |
| 1. **Configure API keys** (LLM + optional Firecrawl) | |
| 2. **Paste a URL and/or text** | |
| 3. **Get an executive summary, risks, opportunities, and actions** | |
| """ | |
| ) | |
| llm_key_state = gr.State("") | |
| with gr.Row(): | |
| # LEFT: API + samples | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 1 β API & Model Settings") | |
| llm_key_input = gr.Textbox( | |
| label="LLM API Key", | |
| placeholder="OpenAI or compatible key", | |
| type="password", | |
| ) | |
| base_url = gr.Textbox( | |
| label="LLM Base URL", | |
| value="https://api.openai.com", | |
| placeholder="e.g. https://api.openai.com", | |
| ) | |
| model_name = gr.Textbox( | |
| label="Model Name", | |
| value="gpt-4.1", | |
| placeholder="e.g. gpt-4.1, gpt-4o, etc.", | |
| ) | |
| gr.Markdown("#### Optional β Firecrawl (URL Scraper)") | |
| firecrawl_key = gr.Textbox( | |
| label="Firecrawl API Key (optional)", | |
| placeholder="Only needed if you want automatic URL scraping", | |
| type="password", | |
| ) | |
| gr.Markdown("#### Sample Config") | |
| sample_dropdown = gr.Dropdown( | |
| label="Load a sample scenario", | |
| choices=list(SAMPLE_CONFIGS.keys()), | |
| value=None, | |
| ) | |
| load_sample_btn = gr.Button("Load Sample") | |
| # RIGHT: content + config | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 2 β Content & Focus") | |
| url_input = gr.Textbox( | |
| label="Source URL", | |
| placeholder="Paste a URL to analyze (works best with Firecrawl key, but optional)", | |
| ) | |
| pasted_text = gr.Textbox( | |
| label="Or paste content manually", | |
| placeholder="Paste article text, notes, or report sections here.", | |
| lines=8, | |
| ) | |
| user_notes = gr.Textbox( | |
| label="Your context / what you care about", | |
| placeholder="Example: Focus on youth workforce impacts and funding opportunities.", | |
| lines=3, | |
| ) | |
| focus = gr.Dropdown( | |
| label="Focus lens", | |
| choices=[ | |
| "Policy / Regulation", | |
| "Product / Market", | |
| "Industry / Strategy", | |
| "Risk & Compliance", | |
| "Custom / Other", | |
| ], | |
| value="Industry / Strategy", | |
| ) | |
| generate_btn = gr.Button("π Generate Insight Brief", variant="primary") | |
| gr.Markdown("### 3 β Executive Brief") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| brief_md = gr.Markdown( | |
| label="Brief", | |
| value="Your executive brief will appear here after generation.", | |
| ) | |
| with gr.Column(scale=2): | |
| brief_json = gr.Code( | |
| label="Raw JSON (for automation / export)", | |
| language="json", | |
| ) | |
| gr.Markdown("### 4 β Insight Density Visual") | |
| brief_fig = gr.Plot(label="Insight Density by Category") | |
| # Wiring | |
| load_sample_btn.click( | |
| load_sample, | |
| inputs=[sample_dropdown], | |
| outputs=[url_input, user_notes, focus], | |
| ) | |
| generate_btn.click( | |
| generate_brief_ui, | |
| inputs=[ | |
| llm_key_state, | |
| llm_key_input, | |
| base_url, | |
| model_name, | |
| firecrawl_key, | |
| url_input, | |
| pasted_text, | |
| user_notes, | |
| focus, | |
| ], | |
| outputs=[brief_md, brief_json, brief_fig, llm_key_state], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |