asrarbw commited on
Commit
8ec1d94
·
verified ·
1 Parent(s): 1b3bcac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -207
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import gradio as gr
2
- import pandas as pd
3
  import os
4
- import json
5
  from huggingface_hub import InferenceClient
6
 
7
  # ===============================
@@ -11,214 +9,17 @@ HF_TOKEN = os.getenv("HF")
11
 
12
  client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN )
13
 
 
 
 
 
 
14
 
15
- # ===============================
16
- # SAFETY
17
- # ===============================
18
- BLOCKED_KEYWORDS = [
19
- "import os", "import sys", "subprocess",
20
- "open(", "eval(", "exec(", "__",
21
- "socket", "requests"
22
- ]
23
-
24
- # ===============================
25
- # AGENT: DECIDE MODE
26
- # ===============================
27
- def decide_mode(user_question, df):
28
- prompt = f"""
29
- You are an expert data analysis agent.
30
-
31
- Dataset columns:
32
- {list(df.columns)}
33
-
34
- Decide how to answer the user's question.
35
-
36
- Choose ONLY ONE mode:
37
- - code_and_insight → requires exact computation
38
- - insight_only → qualitative reasoning only
39
-
40
- Respond ONLY in valid JSON:
41
- {{
42
- "mode": "code_and_insight | insight_only",
43
- "needs_code_visible": true | false
44
- }}
45
-
46
- Set needs_code_visible = true ONLY if the user explicitly asks for code.
47
-
48
- User question:
49
- {user_question}
50
- """
51
- response = ""
52
- for chunk in client.chat_completion(
53
- messages=[{"role": "user", "content": prompt}],
54
- max_tokens=150,
55
- temperature=0,
56
- stream=True,
57
- ):
58
- if chunk.choices and chunk.choices[0].delta.content:
59
- response += chunk.choices[0].delta.content
60
- return response
61
-
62
-
63
- # ===============================
64
- # AGENT: CODE GEN + EXEC
65
- # ===============================
66
- def generate_and_run_code(user_question, df, retries=1):
67
- column_info = {col: str(dtype) for col, dtype in df.dtypes.items()}
68
- last_error = None
69
-
70
- for attempt in range(retries + 1):
71
- planner_prompt = f"""
72
- You are a Python data analyst.
73
-
74
- Dataset columns and types:
75
- {column_info}
76
-
77
- Rules:
78
- - Use pandas only
79
- - Dataframe name: df
80
- - Store final output in variable named: result
81
- - No explanations
82
- - No markdown
83
- - No imports
84
-
85
- User question:
86
- {user_question}
87
- """
88
-
89
- if attempt > 0:
90
- planner_prompt += f"\nPrevious error:\n{last_error}\nFix the code."
91
-
92
- code = ""
93
- for chunk in client.chat_completion(
94
- messages=[{"role": "user", "content": planner_prompt}],
95
- max_tokens=400,
96
- temperature=0.2,
97
- stream=True,
98
- ):
99
- if chunk.choices and chunk.choices[0].delta.content:
100
- code += chunk.choices[0].delta.content
101
-
102
- if any(bad in code for bad in BLOCKED_KEYWORDS):
103
- return None, None, "Unsafe code detected"
104
-
105
- local_env = {"df": df, "result": None}
106
- try:
107
- exec(code, {}, local_env)
108
- return code, local_env["result"], None
109
- except Exception as e:
110
- last_error = str(e)
111
-
112
- return None, None, last_error
113
-
114
-
115
- # ===============================
116
- # CORE CHATBOT
117
- # ===============================
118
- def analyze_excel(message, history, file):
119
-
120
- if file is None:
121
- yield "⚠️ Please upload an Excel file first."
122
- return
123
-
124
- user_question = message["content"] if isinstance(message, dict) else message
125
-
126
- try:
127
- df = pd.read_excel(file.name, engine="openpyxl")
128
-
129
- # 🧠 Decide mode
130
- decision_raw = decide_mode(user_question, df)
131
-
132
- try:
133
- decision = json.loads(decision_raw)
134
- except Exception:
135
- yield "❌ Unable to interpret the request. Please rephrase."
136
- return
137
-
138
- mode = decision["mode"]
139
- show_code = decision["needs_code_visible"]
140
-
141
- # ===============================
142
- # CODE + INSIGHT MODE
143
- # ===============================
144
- if mode == "code_and_insight":
145
- yield "🧠 Running analysis…"
146
-
147
- code, result, error = generate_and_run_code(
148
- user_question=user_question,
149
- df=df,
150
- retries=1
151
- )
152
-
153
- if error:
154
- yield f"❌ Computation failed: {error}"
155
- return
156
-
157
- # Build insight prompt
158
- insight_prompt = f"""
159
- You are a senior data analyst.
160
-
161
- User question:
162
- {user_question}
163
-
164
- Computed result:
165
- {result}
166
-
167
- Explain the insight clearly in natural language.
168
- Focus on meaning and implications.
169
- """
170
-
171
- response = ""
172
- if show_code:
173
- response += f"🧾 Generated Python code:\n\n```python\n{code}\n```\n\n"
174
-
175
- for chunk in client.chat_completion(
176
- messages=[{"role": "user", "content": insight_prompt}],
177
- max_tokens=350,
178
- temperature=0.4,
179
- stream=True,
180
- ):
181
- if chunk.choices and chunk.choices[0].delta.content:
182
- response += chunk.choices[0].delta.content
183
- yield response
184
- return
185
-
186
- # ===============================
187
- # INSIGHT ONLY MODE
188
- # ===============================
189
- summary = f"""
190
- Rows: {len(df)}
191
- Columns: {list(df.columns)}
192
- Missing values:
193
- {df.isnull().sum().to_string()}
194
- """
195
-
196
- insight_prompt = f"""
197
- Dataset summary:
198
- {summary}
199
-
200
- User question:
201
- {user_question}
202
-
203
- Provide high-level analytical insights.
204
- Do not compute exact numbers.
205
- Do not generate code.
206
- """
207
-
208
- response = ""
209
- for chunk in client.chat_completion(
210
- messages=[{"role": "user", "content": insight_prompt}],
211
- max_tokens=400,
212
- temperature=0.4,
213
- stream=True,
214
- ):
215
- if chunk.choices and chunk.choices[0].delta.content:
216
- response += chunk.choices[0].delta.content
217
- yield response
218
 
219
- except Exception as e:
220
- yield f"❌ Error: {str(e)}"
221
 
 
222
 
223
  # ===============================
224
  # UI
 
1
  import gradio as gr
 
2
  import os
 
3
  from huggingface_hub import InferenceClient
4
 
5
  # ===============================
 
9
 
10
  client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN )
11
 
12
+ # Problem Statement:
13
+ # 1. Add your own HF token in the settings to get the LLM working.
14
+ # 2. Update requirements.txt, app.py as needed.
15
+ # 3. Develop a robust "Text-to-Code" analytical workflow hosted on a Hugging Face Space using the Qwen/Qwen2.5-7B-Instruct model.
16
+ # The Workflow Requirements:
17
 
18
+ # a.Code Generation (Planner): Transform natural language user queries into executable, sandboxed Python code (specifically using pandas).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # b.Execution (Action): Securely execute the generated code on the Hugging Face Space server against the uploaded dataset.
 
21
 
22
+ # c.Synthesis (Insight): Capture the raw output of the code execution and feed it back to the LLM to generate a natural language insight.
23
 
24
  # ===============================
25
  # UI