PD03 commited on
Commit
e135aef
Β·
verified Β·
1 Parent(s): bc89306

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +308 -0
app.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import duckdb
4
+ from datasets import load_dataset
5
+ import openai
6
+ import os
7
+ from typing import Dict, List, Any
8
+ import json
9
+
10
+ class SALTAnalytics:
11
+ def __init__(self):
12
+ """Initialize SALT Analytics"""
13
+ self.con = duckdb.connect(':memory:')
14
+ self.data_loaded = False
15
+ self.schema_info = ""
16
+ self.openai_client = None
17
+
18
+ def setup_openai(self, api_key: str):
19
+ """Setup OpenAI client with API key"""
20
+ try:
21
+ self.openai_client = openai.OpenAI(api_key=api_key)
22
+ return True
23
+ except Exception as e:
24
+ return False
25
+
26
+ def load_salt_dataset(self):
27
+ """Load SAP SALT dataset from Hugging Face into DuckDB"""
28
+ if self.data_loaded:
29
+ return "Dataset already loaded!"
30
+
31
+ try:
32
+ # Load dataset with error handling for HF Spaces
33
+ dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
34
+ df = dataset.to_pandas()
35
+
36
+ # Sample data for demo if dataset is too large
37
+ if len(df) > 100000: # Limit for HF Spaces memory
38
+ df = df.sample(n=50000, random_state=42)
39
+
40
+ # Load into DuckDB
41
+ self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
42
+
43
+ # Get schema information
44
+ schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
45
+ self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
46
+
47
+ self.data_loaded = True
48
+ return f"βœ… Successfully loaded {len(df)} records into DuckDB"
49
+
50
+ except Exception as e:
51
+ return f"❌ Error loading dataset: {str(e)}"
52
+
53
+ def get_predefined_insights(self):
54
+ """Generate predefined analytical insights"""
55
+ if not self.data_loaded:
56
+ return "Please load the dataset first"
57
+
58
+ try:
59
+ insights = {}
60
+
61
+ # Sales Office Performance
62
+ insights['Sales Office Performance'] = self.con.execute("""
63
+ SELECT SALESOFFICE,
64
+ COUNT(*) as total_orders,
65
+ COUNT(DISTINCT CUSTOMERID) as unique_customers
66
+ FROM salt_data
67
+ GROUP BY SALESOFFICE
68
+ ORDER BY total_orders DESC
69
+ LIMIT 10
70
+ """).fetchdf()
71
+
72
+ # Payment Terms Distribution
73
+ insights['Payment Terms Distribution'] = self.con.execute("""
74
+ SELECT CUSTOMERPAYMENTTERMS,
75
+ COUNT(*) as frequency,
76
+ ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2) as percentage
77
+ FROM salt_data
78
+ GROUP BY CUSTOMERPAYMENTTERMS
79
+ ORDER BY frequency DESC
80
+ """).fetchdf()
81
+
82
+ # Shipping Conditions Analysis
83
+ insights['Shipping Conditions'] = self.con.execute("""
84
+ SELECT SHIPPINGCONDITION,
85
+ COUNT(*) as order_count,
86
+ COUNT(DISTINCT PLANT) as plants_served
87
+ FROM salt_data
88
+ GROUP BY SHIPPINGCONDITION
89
+ ORDER BY order_count DESC
90
+ """).fetchdf()
91
+
92
+ return insights
93
+
94
+ except Exception as e:
95
+ return f"Error generating insights: {str(e)}"
96
+
97
+ def natural_language_query(self, question: str, api_key: str):
98
+ """Convert natural language to SQL and execute"""
99
+ if not self.data_loaded:
100
+ return "Please load the dataset first"
101
+
102
+ if not api_key:
103
+ return "Please provide OpenAI API key"
104
+
105
+ try:
106
+ # Setup OpenAI client
107
+ client = openai.OpenAI(api_key=api_key)
108
+
109
+ prompt = f"""
110
+ You are a SQL expert analyzing SAP SALT dataset. The database has a table called 'salt_data' with this schema:
111
+
112
+ {self.schema_info}
113
+
114
+ The SALT dataset contains SAP ERP sales order data. Key fields:
115
+ - SALESOFFICE, SALESGROUP: Sales organization
116
+ - CUSTOMERID: Customer identifier
117
+ - CUSTOMERPAYMENTTERMS: Payment terms (Net30, Net45, etc.)
118
+ - SHIPPINGCONDITION, SHIPPINGPOINT: Shipping logistics
119
+ - PLANT: Manufacturing location
120
+ - HEADERINCOTERMSCLASSIFICATION, ITEMINCOTERMSCLASSIFICATION: Trade terms
121
+
122
+ Convert this question to a DuckDB SQL query: "{question}"
123
+
124
+ Return ONLY the SQL query, no explanation. Limit results to 20 rows.
125
+ """
126
+
127
+ response = client.chat.completions.create(
128
+ model="gpt-4",
129
+ messages=[{"role": "user", "content": prompt}],
130
+ temperature=0.1
131
+ )
132
+
133
+ sql_query = response.choices[0].message.content.strip()
134
+
135
+ # Clean SQL query
136
+ if sql_query.startswith("```
137
+ sql_query = sql_query[6:-3]
138
+ elif sql_query.startswith("```"):
139
+ sql_query = sql_query[3:-3]
140
+
141
+ # Execute query
142
+ result_df = self.con.execute(sql_query).fetchdf()
143
+
144
+ # Get explanation
145
+ explanation_prompt = f"""
146
+ Question: {question}
147
+ Results: {result_df.head(10).to_string()}
148
+
149
+ Provide a clear business explanation of these SAP ERP results in 2-3 sentences.
150
+ """
151
+
152
+ explanation_response = client.chat.completions.create(
153
+ model="gpt-4",
154
+ messages=[{"role": "user", "content": explanation_prompt}],
155
+ temperature=0.3
156
+ )
157
+
158
+ explanation = explanation_response.choices[0].message.content
159
+
160
+ return f"**SQL Query:**\n``````\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
161
+
162
+ except Exception as e:
163
+ return f"Error: {str(e)}"
164
+
165
+ # Initialize analytics
166
+ analytics = SALTAnalytics()
167
+
168
+ def load_dataset_interface():
169
+ """Interface for loading dataset"""
170
+ result = analytics.load_salt_dataset()
171
+ return result
172
+
173
+ def show_insights_interface():
174
+ """Interface for showing insights"""
175
+ insights = analytics.get_predefined_insights()
176
+
177
+ if isinstance(insights, str):
178
+ return insights
179
+
180
+ output = "# πŸ“Š SAP SALT Dataset Insights\n\n"
181
+
182
+ for title, df in insights.items():
183
+ output += f"## {title}\n\n"
184
+ output += df.to_markdown(index=False)
185
+ output += "\n\n---\n\n"
186
+
187
+ return output
188
+
189
+ def qa_interface(question: str, api_key: str):
190
+ """Interface for Q&A functionality"""
191
+ if not question.strip():
192
+ return "Please enter a question"
193
+
194
+ result = analytics.natural_language_query(question, api_key)
195
+ return result
196
+
197
+ # Sample questions for the interface
198
+ sample_questions = [
199
+ "Which sales office has the most customers?",
200
+ "What are the most common payment terms?",
201
+ "Show me shipping conditions by plant",
202
+ "Which customers have the highest number of orders?",
203
+ "What's the distribution of sales groups?"
204
+ ]
205
+
206
+ # Create Gradio interface
207
+ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
208
+
209
+ gr.Markdown("""
210
+ # πŸš€ SAP SALT Dataset Analytics Demo
211
+ ## Open Source Analytics + AI for SAP ERP
212
+
213
+ This demo showcases how open source tools (DuckDB + OpenAI) can generate massive value for enterprises running SAP ERP systems.
214
+ """)
215
+
216
+ with gr.Tab("πŸ“₯ Load Dataset"):
217
+ gr.Markdown("### Load SAP SALT Dataset from Hugging Face")
218
+
219
+ load_btn = gr.Button("Load SALT Dataset", variant="primary")
220
+ load_output = gr.Textbox(label="Status", lines=3)
221
+
222
+ load_btn.click(
223
+ fn=load_dataset_interface,
224
+ outputs=load_output
225
+ )
226
+
227
+ with gr.Tab("πŸ“ˆ Insights"):
228
+ gr.Markdown("### Pre-built Analytics Insights")
229
+
230
+ insights_btn = gr.Button("Generate Insights", variant="primary")
231
+ insights_output = gr.Markdown()
232
+
233
+ insights_btn.click(
234
+ fn=show_insights_interface,
235
+ outputs=insights_output
236
+ )
237
+
238
+ with gr.Tab("πŸ€– AI Q&A"):
239
+ gr.Markdown("### Ask Questions in Natural Language")
240
+
241
+ with gr.Row():
242
+ with gr.Column(scale=3):
243
+ api_key_input = gr.Textbox(
244
+ label="OpenAI API Key",
245
+ type="password",
246
+ placeholder="Enter your OpenAI API key"
247
+ )
248
+
249
+ question_input = gr.Textbox(
250
+ label="Your Question",
251
+ placeholder="e.g., Which sales office handles the most customers?",
252
+ lines=2
253
+ )
254
+
255
+ sample_dropdown = gr.Dropdown(
256
+ choices=sample_questions,
257
+ label="Or choose a sample question",
258
+ value=None
259
+ )
260
+
261
+ ask_btn = gr.Button("Get Answer", variant="primary")
262
+
263
+ with gr.Column(scale=4):
264
+ qa_output = gr.Markdown()
265
+
266
+ # Update question input when sample is selected
267
+ sample_dropdown.change(
268
+ fn=lambda x: x if x else "",
269
+ inputs=sample_dropdown,
270
+ outputs=question_input
271
+ )
272
+
273
+ ask_btn.click(
274
+ fn=qa_interface,
275
+ inputs=[question_input, api_key_input],
276
+ outputs=qa_output
277
+ )
278
+
279
+ with gr.Tab("ℹ️ About"):
280
+ gr.Markdown("""
281
+ ### About This Demo
282
+
283
+ **Dataset**: SAP SALT (Sales Autocompletion Linked Business Tables)
284
+ - Real SAP S/4HANA sales order data
285
+ - 4 linked tables: Sales Documents, Items, Customers, Addresses
286
+ - 8 classification targets for ML models
287
+
288
+ **Technology Stack**:
289
+ - **DuckDB**: High-performance analytics database
290
+ - **OpenAI GPT-4**: Natural language to SQL conversion
291
+ - **Hugging Face**: Dataset hosting and deployment
292
+ - **Gradio**: Interactive web interface
293
+
294
+ **Business Value**:
295
+ - Automate sales order completion (70-80% accuracy)
296
+ - Optimize customer-to-sales office assignments
297
+ - Predict shipping and payment preferences
298
+ - Generate actionable business insights
299
+
300
+ **Open Source Benefits**:
301
+ - Zero licensing costs vs. proprietary SAP analytics
302
+ - Full customization and control
303
+ - Community-driven improvements
304
+ - Easy integration with existing systems
305
+ """)
306
+
307
+ if __name__ == "__main__":
308
+ demo.launch()