Interview_template

Sleeping

App Files Files Community

asrarbw commited on Feb 9

Commit

8ec1d94

verified ·

1 Parent(s): 1b3bcac

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -207

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import gradio as gr
-import pandas as pd
 import os
-import json
 from huggingface_hub import InferenceClient
 # ===============================
@@ -11,214 +9,17 @@ HF_TOKEN = os.getenv("HF")
 client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN )
-# ===============================
-# SAFETY
-# ===============================
-BLOCKED_KEYWORDS = [
-    "import os", "import sys", "subprocess",
-    "open(", "eval(", "exec(", "__",
-    "socket", "requests"
-]
-# ===============================
-# AGENT: DECIDE MODE
-# ===============================
-def decide_mode(user_question, df):
-    prompt = f"""
-You are an expert data analysis agent.
-Dataset columns:
-{list(df.columns)}
-Decide how to answer the user's question.
-Choose ONLY ONE mode:
-- code_and_insight → requires exact computation
-- insight_only → qualitative reasoning only
-Respond ONLY in valid JSON:
-{{
-  "mode": "code_and_insight | insight_only",
-  "needs_code_visible": true | false
-}}
-Set needs_code_visible = true ONLY if the user explicitly asks for code.
-User question:
-{user_question}
-"""
-    response = ""
-    for chunk in client.chat_completion(
-        messages=[{"role": "user", "content": prompt}],
-        max_tokens=150,
-        temperature=0,
-        stream=True,
-    ):
-        if chunk.choices and chunk.choices[0].delta.content:
-            response += chunk.choices[0].delta.content
-    return response
-# ===============================
-# AGENT: CODE GEN + EXEC
-# ===============================
-def generate_and_run_code(user_question, df, retries=1):
-    column_info = {col: str(dtype) for col, dtype in df.dtypes.items()}
-    last_error = None
-    for attempt in range(retries + 1):
-        planner_prompt = f"""
-You are a Python data analyst.
-Dataset columns and types:
-{column_info}
-Rules:
-- Use pandas only
-- Dataframe name: df
-- Store final output in variable named: result
-- No explanations
-- No markdown
-- No imports
-User question:
-{user_question}
-"""
-        if attempt > 0:
-            planner_prompt += f"\nPrevious error:\n{last_error}\nFix the code."
-        code = ""
-        for chunk in client.chat_completion(
-            messages=[{"role": "user", "content": planner_prompt}],
-            max_tokens=400,
-            temperature=0.2,
-            stream=True,
-        ):
-            if chunk.choices and chunk.choices[0].delta.content:
-                code += chunk.choices[0].delta.content
-        if any(bad in code for bad in BLOCKED_KEYWORDS):
-            return None, None, "Unsafe code detected"
-        local_env = {"df": df, "result": None}
-        try:
-            exec(code, {}, local_env)
-            return code, local_env["result"], None
-        except Exception as e:
-            last_error = str(e)
-    return None, None, last_error
-# ===============================
-# CORE CHATBOT
-# ===============================
-def analyze_excel(message, history, file):
-    if file is None:
-        yield "⚠️ Please upload an Excel file first."
-        return
-    user_question = message["content"] if isinstance(message, dict) else message
-    try:
-        df = pd.read_excel(file.name, engine="openpyxl")
-        # 🧠 Decide mode
-        decision_raw = decide_mode(user_question, df)
-        try:
-            decision = json.loads(decision_raw)
-        except Exception:
-            yield "❌ Unable to interpret the request. Please rephrase."
-            return
-        mode = decision["mode"]
-        show_code = decision["needs_code_visible"]
-        # ===============================
-        # CODE + INSIGHT MODE
-        # ===============================
-        if mode == "code_and_insight":
-            yield "🧠 Running analysis…"
-            code, result, error = generate_and_run_code(
-                user_question=user_question,
-                df=df,
-                retries=1
-            )
-            if error:
-                yield f"❌ Computation failed: {error}"
-                return
-            # Build insight prompt
-            insight_prompt = f"""
-You are a senior data analyst.
-User question:
-{user_question}
-Computed result:
-{result}
-Explain the insight clearly in natural language.
-Focus on meaning and implications.
-"""
-            response = ""
-            if show_code:
-                response += f"🧾 Generated Python code:\n\n```python\n{code}\n```\n\n"
-            for chunk in client.chat_completion(
-                messages=[{"role": "user", "content": insight_prompt}],
-                max_tokens=350,
-                temperature=0.4,
-                stream=True,
-            ):
-                if chunk.choices and chunk.choices[0].delta.content:
-                    response += chunk.choices[0].delta.content
-                    yield response
-            return
-        # ===============================
-        # INSIGHT ONLY MODE
-        # ===============================
-        summary = f"""
-Rows: {len(df)}
-Columns: {list(df.columns)}
-Missing values:
-{df.isnull().sum().to_string()}
-"""
-        insight_prompt = f"""
-Dataset summary:
-{summary}
-User question:
-{user_question}
-Provide high-level analytical insights.
-Do not compute exact numbers.
-Do not generate code.
-"""
-        response = ""
-        for chunk in client.chat_completion(
-            messages=[{"role": "user", "content": insight_prompt}],
-            max_tokens=400,
-            temperature=0.4,
-            stream=True,
-        ):
-            if chunk.choices and chunk.choices[0].delta.content:
-                response += chunk.choices[0].delta.content
-                yield response
-    except Exception as e:
-        yield f"❌ Error: {str(e)}"
 # ===============================
 # UI

 import gradio as gr
 import os
 from huggingface_hub import InferenceClient
 # ===============================
 client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN )
+# Problem Statement:
+# 1. Add your own HF token in the settings to get the LLM working.
+# 2. Update requirements.txt, app.py as needed.
+# 3. Develop a robust "Text-to-Code" analytical workflow hosted on a Hugging Face Space using the Qwen/Qwen2.5-7B-Instruct model.
+# The Workflow Requirements:
+# a.Code Generation (Planner): Transform natural language user queries into executable, sandboxed Python code (specifically using pandas).
+# b.Execution (Action): Securely execute the generated code on the Hugging Face Space server against the uploaded dataset.
+# c.Synthesis (Insight): Capture the raw output of the code execution and feed it back to the LLM to generate a natural language insight.
 # ===============================
 # UI