Spaces:

lol040604lol
/

pavi

Sleeping

App Files Files Community

lol040604lol commited on Mar 8

Commit

40e49d9

verified ·

1 Parent(s): 5471b51

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -263

app.py CHANGED Viewed

@@ -8,14 +8,11 @@ import whisper
 import torch
 import requests
 from datetime import datetime
-from sklearn.linear_model import LinearRegression
 import numpy as np
 from dotenv import load_dotenv
 import os
-import librosa
-import tempfile
 import soundfile as sf
-from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
 # Load .env file
 load_dotenv()
@@ -27,7 +24,7 @@ model_path = os.getenv('MODEL_PATH')
 # Load product and objection data
 @st.cache_resource
 def load_data():
-    product_data = pd.read_csv("product_data.csv")  # Use relative path
     objections_data = pd.read_csv("objections_data.csv")
     return product_data, objections_data
@@ -39,26 +36,12 @@ objections = objections_data['objection'].tolist()
 responses = objections_data['response'].tolist()
 # Initialize models
-RTC_CONFIG = {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
 def initialize_models():
-    """
-    Initializes the SentenceTransformer and Whisper models.
-    Returns:
-        model: SentenceTransformer instance for embeddings.
-        whisper_model: Whisper model instance for audio transcription.
-    """
     try:
-        # Check for GPU availability
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
-        # Load SentenceTransformer model
         model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
-        # Load Whisper model
         whisper_model = whisper.load_model("base", device=device)
         print("Models successfully initialized.")
@@ -67,7 +50,6 @@ def initialize_models():
         print(f"Error initializing models: {e}")
         raise
-# Initialize and store models
 model, whisper_model = initialize_models()
 # Create embeddings and FAISS indices
@@ -85,256 +67,47 @@ def create_indices():
     return product_index, objection_index
 product_index, objection_index = create_indices()
-# Configuration for WebRTC
-RTC_CONFIG = RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]})
-from io import BytesIO
-import soundfile as sf
-def process_audio(audio_data, sample_rate=16000):
     try:
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
-            sf.write(temp_audio_file.name, audio_data, samplerate=sample_rate)
-            # Transcribe audio using Whisper
-            transcription = whisper_model.transcribe(temp_audio_file.name)["text"]
-        os.unlink(temp_audio_file.name)  # Cleanup temporary file
         return transcription
     except Exception as e:
-        st.error(f"Error processing audio: {e}")
         return None
-class AudioProcessor:
-    def __init__(self, whisper_model):
-        self.whisper_model = whisper_model
-        self.transcription = ""
-    def process_audio(self, audio_data):
-        try:
-            # Convert raw audio bytes to a WAV file
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
-                sf.write(temp_audio_file.name, audio_data, samplerate=16000)
-                # Transcribe audio using Whisper
-                transcription = self.whisper_model.transcribe(temp_audio_file.name)["text"]
-            os.unlink(temp_audio_file.name)  # Clean up the temporary file
-            return transcription
-        except Exception as e:
-            st.error(f"Error processing audio: {e}")
-            return None
-audio_processor = AudioProcessor(whisper_model)
-import av
-from streamlit_webrtc import webrtc_streamer, WebRtcMode
-def audio_callback(frame: av.AudioFrame):
-    audio = frame.to_ndarray()
-    transcription = process_audio(audio)
-    if transcription:
-        st.session_state.transcription = transcription
-webrtc_streamer(
-    key="speech-to-text",
-    mode=WebRtcMode.SENDRECV,
-    rtc_configuration=RTC_CONFIG,
-    media_stream_constraints={"audio": True, "video": False},
-    async_processing=True,
-    audio_processor_factory=audio_callback
-)
-# Initialize audio stream
-# Hugging Face API for sentiment analysis
-API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
-API_KEY = api_key
-headers = {"Authorization": f"Bearer {API_KEY}"}
-def analyze_sentiment(text):
-    payload = {"inputs": text}
-    response = requests.post(API_URL, headers=headers, json=payload)
-    if response.status_code == 200:
-        result = response.json()
-        sentiments = result[0]
-        if len(sentiments) > 0:
-            best_sentiment = max(sentiments, key=lambda x: x['score'])
-            return best_sentiment
-        else:
-            return {"label": "ERROR", "score": 0.0}
-    return {"label": "ERROR", "score": 0.0}
-def recommend_products(query):
-    query_embedding = model.encode([query])
-    distances, indices = product_index.search(query_embedding, 3)
-    return [(product_titles[i], product_descriptions[i]) for i in indices[0]]
-def handle_objection(query):
-    query_embedding = model.encode([query])
-    distances, indices = objection_index.search(query_embedding, 1)
-    idx = indices[0][0]
-    return objections[idx], responses[idx]
-# Function to save session data to a JSON file
-def save_session_data(session_data):
-    with open("session_data.json", "w") as f:
-        json.dump(session_data, f)
 # Streamlit UI
-st.title("Real-Time Product Recommendation & Sentiment Analysis")
-session_data = {
-    "interactions": [],
-    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-}
-if st.button("Stop Listening"):
-    st.info("Processing session data...")
-    # Load session data from the JSON file
-    with open("session_data.json", "r") as f:
-        session_data = json.load(f)
-    # Import and analyze data using dashboard.py
-    from dashboard import analyze_data  # Ensure dashboard.py is in the same directory
-    analysis_results = analyze_data(session_data)
-    # Initialize variables for the summary
-    summary_data = []
-    all_recommendations = []
-    objection_summary = []
-    # Process each interaction in the session data
-    for i, interaction in enumerate(session_data["interactions"]):
-        # Extract relevant data
-        customer_transcription = interaction['transcription']
-        sentiment_label = interaction['sentiment']['label']
-        product_recommendations = [rec[1] for rec in interaction['product_recommendations']]
-        objection = interaction.get('objection_handling', None)
-        # Build the narrative for each interaction
-        if i == 0:
-            # Start of the call
-            summary_data.append(f"When the call started, the customer was {sentiment_label}. ")
-        else:
-            # Progression of the conversation
-            summary_data.append(f"Then, the customer's tone shifted to {sentiment_label}. ")
-        # Add product recommendations to the summary
-        summary_data.append(f"We provided recommendations: {', '.join(product_recommendations)}. ")
-        # Add objection handling if applicable
-        #if objection:
-        #    summary_data.append(f"Objection: {objection['objection']}. Response: {objection['response']}. ")
-        # Collect all recommendations and objections for analysis
-        #all_recommendations.extend(product_recommendations)
-        #if objection:
-        #    objection_summary.append(f"Objection: {objection['objection']}, Response: {objection['response']}")
-    # Combine the summary data into one long narrative
-    narrative_summary = " ".join(summary_data)
-    overall_sentiment = (
-        "Overall sentiment trends are depicted in the Call Summary Table and Sentiment Trends graph. "
-        "Explore Sentiment Predictions below to anticipate the customer's future interests."
-    )
-    # Debug: Verify narrative summary
-    print("Narrative Summary:", narrative_summary)
-    print("All Recommendations:", all_recommendations)
-    print("Objection Summary:", objection_summary)
-    # Load BART model and tokenizer for summarization
-    from transformers import BartForConditionalGeneration, BartTokenizer
-    def load_bart_model():
-        model_name = "facebook/bart-large-cnn"  # Pre-trained BART model for summarization
-        model = BartForConditionalGeneration.from_pretrained(model_name)
-        tokenizer = BartTokenizer.from_pretrained(model_name)
-        return model, tokenizer
-    model, tokenizer = load_bart_model()
-    # Function to generate summary using BART
-    def generate_summary(text, model, tokenizer):
-        inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
-        summary_ids = model.generate(inputs, max_length=200, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
-        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-        return summary
-    # Generate the post-call summary
-    call_summary = generate_summary(narrative_summary, model, tokenizer)
-    # Display the post-call summary
-    st.subheader("Post-Call Summary")
-    st.write(f"Session Timestamp: {analysis_results['timestamp']}")
-    st.write("Debug Narrative Summary:", narrative_summary)
-    # Display the summary table
-    st.subheader("Call Summary Table")
-    st.dataframe(analysis_results["summary_table"])
-    # Display the sentiment trends chart
-    st.subheader("Sentiment Trends")
-    st.pyplot(analysis_results["sentiment_chart"])
-    # Display product recommendation trends
-    st.subheader("Top Product Recommendations")
-    st.pyplot(analysis_results["recommendation_chart"])
-    # Display sentiment predictions (if available)
-    st.subheader("Sentiment Predictions")
-    sentiment_predictions = analysis_results["sentiment_predictions"]
-    if isinstance(sentiment_predictions, str):  # Handle insufficient data case
-        st.write(sentiment_predictions)
-    else:
-        st.line_chart(sentiment_predictions)
-    # Display the word cloud for transcription topics
-    st.subheader("Transcription Word Cloud")
-    st.pyplot(analysis_results["wordcloud"])
-    # Display actionable recommendations
-    st.subheader("Actionable Recommendations")
-    for recommendation in analysis_results["actionable_recommendations"]:
-        st.write(f"- {recommendation}")
-if "session_data" not in st.session_state:
-    st.session_state["session_data"] = {"interactions": [], "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
-session_data = st.session_state["session_data"]
-import streamlit as st
-import tempfile
-import os
-import soundfile as sf
-import json
-import tempfile
-import os
-import soundfile as sf
-import json
-if "listening" not in st.session_state:
-    st.session_state.listening = False
-if "transcription" not in st.session_state:
-    st.session_state.transcription = ""
-if st.button("Start Listening"):
-    st.session_state.listening = True
-if st.session_state.transcription:
-    st.subheader("Transcribed Text:")
-    st.write(st.session_state.transcription)
-if st.session_state.listening:
-    st.info("Listening... Speak into the microphone.")
-    webrtc_streamer(
-        key="example",
-        mode=WebRtcMode.SENDRECV,
-        rtc_configuration=RTC_CONFIG,
-        media_stream_constraints={"audio": True, "video": False},
-        audio_frame_callback=audio_callback,
-        async_processing=True,
-    )

 import torch
 import requests
 from datetime import datetime
 import numpy as np
 from dotenv import load_dotenv
 import os
 import soundfile as sf
+import tempfile
 # Load .env file
 load_dotenv()
 # Load product and objection data
 @st.cache_resource
 def load_data():
+    product_data = pd.read_csv("product_data.csv")
     objections_data = pd.read_csv("objections_data.csv")
     return product_data, objections_data
 responses = objections_data['response'].tolist()
 # Initialize models
 def initialize_models():
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
         model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
         whisper_model = whisper.load_model("base", device=device)
         print("Models successfully initialized.")
         print(f"Error initializing models: {e}")
         raise
 model, whisper_model = initialize_models()
 # Create embeddings and FAISS indices
     return product_index, objection_index
 product_index, objection_index = create_indices()
+# Process recorded audio file
+def process_audio_file(uploaded_file):
     try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+            temp_audio.write(uploaded_file.read())
+            temp_audio_path = temp_audio.name
+        transcription = whisper_model.transcribe(temp_audio_path)["text"]
+        os.unlink(temp_audio_path)  # Cleanup temporary file
         return transcription
     except Exception as e:
+        st.error(f"Error processing audio file: {e}")
         return None
 # Streamlit UI
+st.title("Recorded Audio Product Recommendation & Sentiment Analysis")
+uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
+if uploaded_file is not None:
+    st.audio(uploaded_file, format='audio/wav')
+    transcription = process_audio_file(uploaded_file)
+    if transcription:
+        st.subheader("Transcription:")
+        st.write(transcription)
+        sentiment_result = requests.post(
+            "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={"inputs": transcription}
+        ).json()
+        if sentiment_result:
+            st.subheader("Sentiment Analysis:")
+            st.write(sentiment_result[0])
+        recommendations = model.encode([transcription])
+        distances, indices = product_index.search(recommendations, 3)
+        recommended_products = [(product_titles[i], product_descriptions[i]) for i in indices[0]]
+        st.subheader("Product Recommendations:")
+        for title, desc in recommended_products:
+            st.write(f"**{title}**: {desc}")