Spaces:

PD03
/

Salt

Sleeping

App Files Files Community

PD03 commited on Sep 1

Commit

e135aef

verified ·

1 Parent(s): bc89306

Create app.py

Browse files

Files changed (1) hide show

app.py +308 -0

app.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import gradio as gr
+import pandas as pd
+import duckdb
+from datasets import load_dataset
+import openai
+import os
+from typing import Dict, List, Any
+import json
+class SALTAnalytics:
+    def __init__(self):
+        """Initialize SALT Analytics"""
+        self.con = duckdb.connect(':memory:')
+        self.data_loaded = False
+        self.schema_info = ""
+        self.openai_client = None
+    def setup_openai(self, api_key: str):
+        """Setup OpenAI client with API key"""
+        try:
+            self.openai_client = openai.OpenAI(api_key=api_key)
+            return True
+        except Exception as e:
+            return False
+    def load_salt_dataset(self):
+        """Load SAP SALT dataset from Hugging Face into DuckDB"""
+        if self.data_loaded:
+            return "Dataset already loaded!"
+        try:
+            # Load dataset with error handling for HF Spaces
+            dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
+            df = dataset.to_pandas()
+            # Sample data for demo if dataset is too large
+            if len(df) > 100000:  # Limit for HF Spaces memory
+                df = df.sample(n=50000, random_state=42)
+            # Load into DuckDB
+            self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
+            # Get schema information
+            schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
+            self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
+            self.data_loaded = True
+            return f"✅ Successfully loaded {len(df)} records into DuckDB"
+        except Exception as e:
+            return f"❌ Error loading dataset: {str(e)}"
+    def get_predefined_insights(self):
+        """Generate predefined analytical insights"""
+        if not self.data_loaded:
+            return "Please load the dataset first"
+        try:
+            insights = {}
+            # Sales Office Performance
+            insights['Sales Office Performance'] = self.con.execute("""
+                SELECT SALESOFFICE,
+                       COUNT(*) as total_orders,
+                       COUNT(DISTINCT CUSTOMERID) as unique_customers
+                FROM salt_data
+                GROUP BY SALESOFFICE
+                ORDER BY total_orders DESC
+                LIMIT 10
+            """).fetchdf()
+            # Payment Terms Distribution
+            insights['Payment Terms Distribution'] = self.con.execute("""
+                SELECT CUSTOMERPAYMENTTERMS,
+                       COUNT(*) as frequency,
+                       ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2) as percentage
+                FROM salt_data
+                GROUP BY CUSTOMERPAYMENTTERMS
+                ORDER BY frequency DESC
+            """).fetchdf()
+            # Shipping Conditions Analysis
+            insights['Shipping Conditions'] = self.con.execute("""
+                SELECT SHIPPINGCONDITION,
+                       COUNT(*) as order_count,
+                       COUNT(DISTINCT PLANT) as plants_served
+                FROM salt_data
+                GROUP BY SHIPPINGCONDITION
+                ORDER BY order_count DESC
+            """).fetchdf()
+            return insights
+        except Exception as e:
+            return f"Error generating insights: {str(e)}"
+    def natural_language_query(self, question: str, api_key: str):
+        """Convert natural language to SQL and execute"""
+        if not self.data_loaded:
+            return "Please load the dataset first"
+        if not api_key:
+            return "Please provide OpenAI API key"
+        try:
+            # Setup OpenAI client
+            client = openai.OpenAI(api_key=api_key)
+            prompt = f"""
+            You are a SQL expert analyzing SAP SALT dataset. The database has a table called 'salt_data' with this schema:
+            {self.schema_info}
+            The SALT dataset contains SAP ERP sales order data. Key fields:
+            - SALESOFFICE, SALESGROUP: Sales organization
+            - CUSTOMERID: Customer identifier
+            - CUSTOMERPAYMENTTERMS: Payment terms (Net30, Net45, etc.)
+            - SHIPPINGCONDITION, SHIPPINGPOINT: Shipping logistics
+            - PLANT: Manufacturing location
+            - HEADERINCOTERMSCLASSIFICATION, ITEMINCOTERMSCLASSIFICATION: Trade terms
+            Convert this question to a DuckDB SQL query: "{question}"
+            Return ONLY the SQL query, no explanation. Limit results to 20 rows.
+            """
+            response = client.chat.completions.create(
+                model="gpt-4",
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.1
+            )
+            sql_query = response.choices[0].message.content.strip()
+            # Clean SQL query
+            if sql_query.startswith("```
+                sql_query = sql_query[6:-3]
+            elif sql_query.startswith("```"):
+                sql_query = sql_query[3:-3]
+            # Execute query
+            result_df = self.con.execute(sql_query).fetchdf()
+            # Get explanation
+            explanation_prompt = f"""
+            Question: {question}
+            Results: {result_df.head(10).to_string()}
+            Provide a clear business explanation of these SAP ERP results in 2-3 sentences.
+            """
+            explanation_response = client.chat.completions.create(
+                model="gpt-4",
+                messages=[{"role": "user", "content": explanation_prompt}],
+                temperature=0.3
+            )
+            explanation = explanation_response.choices[0].message.content
+            return f"**SQL Query:**\n``````\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
+        except Exception as e:
+            return f"Error: {str(e)}"
+# Initialize analytics
+analytics = SALTAnalytics()
+def load_dataset_interface():
+    """Interface for loading dataset"""
+    result = analytics.load_salt_dataset()
+    return result
+def show_insights_interface():
+    """Interface for showing insights"""
+    insights = analytics.get_predefined_insights()
+    if isinstance(insights, str):
+        return insights
+    output = "# 📊 SAP SALT Dataset Insights\n\n"
+    for title, df in insights.items():
+        output += f"## {title}\n\n"
+        output += df.to_markdown(index=False)
+        output += "\n\n---\n\n"
+    return output
+def qa_interface(question: str, api_key: str):
+    """Interface for Q&A functionality"""
+    if not question.strip():
+        return "Please enter a question"
+    result = analytics.natural_language_query(question, api_key)
+    return result
+# Sample questions for the interface
+sample_questions = [
+    "Which sales office has the most customers?",
+    "What are the most common payment terms?",
+    "Show me shipping conditions by plant",
+    "Which customers have the highest number of orders?",
+    "What's the distribution of sales groups?"
+]
+# Create Gradio interface
+with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🚀 SAP SALT Dataset Analytics Demo
+    ## Open Source Analytics + AI for SAP ERP
+    This demo showcases how open source tools (DuckDB + OpenAI) can generate massive value for enterprises running SAP ERP systems.
+    """)
+    with gr.Tab("📥 Load Dataset"):
+        gr.Markdown("### Load SAP SALT Dataset from Hugging Face")
+        load_btn = gr.Button("Load SALT Dataset", variant="primary")
+        load_output = gr.Textbox(label="Status", lines=3)
+        load_btn.click(
+            fn=load_dataset_interface,
+            outputs=load_output
+        )
+    with gr.Tab("📈 Insights"):
+        gr.Markdown("### Pre-built Analytics Insights")
+        insights_btn = gr.Button("Generate Insights", variant="primary")
+        insights_output = gr.Markdown()
+        insights_btn.click(
+            fn=show_insights_interface,
+            outputs=insights_output
+        )
+    with gr.Tab("🤖 AI Q&A"):
+        gr.Markdown("### Ask Questions in Natural Language")
+        with gr.Row():
+            with gr.Column(scale=3):
+                api_key_input = gr.Textbox(
+                    label="OpenAI API Key",
+                    type="password",
+                    placeholder="Enter your OpenAI API key"
+                )
+                question_input = gr.Textbox(
+                    label="Your Question",
+                    placeholder="e.g., Which sales office handles the most customers?",
+                    lines=2
+                )
+                sample_dropdown = gr.Dropdown(
+                    choices=sample_questions,
+                    label="Or choose a sample question",
+                    value=None
+                )
+                ask_btn = gr.Button("Get Answer", variant="primary")
+            with gr.Column(scale=4):
+                qa_output = gr.Markdown()
+        # Update question input when sample is selected
+        sample_dropdown.change(
+            fn=lambda x: x if x else "",
+            inputs=sample_dropdown,
+            outputs=question_input
+        )
+        ask_btn.click(
+            fn=qa_interface,
+            inputs=[question_input, api_key_input],
+            outputs=qa_output
+        )
+    with gr.Tab("ℹ️ About"):
+        gr.Markdown("""
+        ### About This Demo
+        **Dataset**: SAP SALT (Sales Autocompletion Linked Business Tables)
+        - Real SAP S/4HANA sales order data
+        - 4 linked tables: Sales Documents, Items, Customers, Addresses
+        - 8 classification targets for ML models
+        **Technology Stack**:
+        - **DuckDB**: High-performance analytics database
+        - **OpenAI GPT-4**: Natural language to SQL conversion
+        - **Hugging Face**: Dataset hosting and deployment
+        - **Gradio**: Interactive web interface
+        **Business Value**:
+        - Automate sales order completion (70-80% accuracy)
+        - Optimize customer-to-sales office assignments
+        - Predict shipping and payment preferences
+        - Generate actionable business insights
+        **Open Source Benefits**:
+        - Zero licensing costs vs. proprietary SAP analytics
+        - Full customization and control
+        - Community-driven improvements
+        - Easy integration with existing systems
+        """)
+if __name__ == "__main__":
+    demo.launch()