Spaces:

Rohitface
/

chat_chat

Sleeping

App Files Files Community

Rohitface commited on 20 days ago

Commit

07959c7

verified ·

1 Parent(s): eb91249

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -140

app.py CHANGED Viewed

@@ -1,155 +1,139 @@
-# app.py
 import gradio as gr
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-import faiss
-import numpy as np
-# --- Backend Logic ---
-# Step 1: Load the necessary models
-# OPTIMIZED: Switched to 'google/flan-t5-small' for maximum speed on free hardware.
-print("Loading models... This may take a moment, especially the first time.")
-generator = pipeline("text2text-generation", model="google/flan-t5-small")
-embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-print("Models loaded successfully!")
-def chunk_text(text, chunk_size=256, overlap=32):
-    """Splits text into overlapping chunks."""
-    words = text.split()
-    chunks = []
-    for i in range(0, len(words), chunk_size - overlap):
-        chunks.append(" ".join(words[i:i + chunk_size]))
-    return chunks
-def process_chat_request(user_question, chat_history, state_data):
     """
-    The main function that handles the chat logic using the RAG pipeline.
     """
-    index = state_data.get("index")
-    chunks = state_data.get("chunks")
-    if not all([index, chunks]):
-        raise gr.Error("File index is missing. Please restart by uploading a file.")
-    if not user_question:
-        raise gr.Error("Please enter a question.")
     try:
-        # 1. RETRIEVE: Find the most relevant chunks
-        question_embedding = embedder.encode([user_question])
-        _, top_k_indices = index.search(question_embedding, k=3) # Retrieve top 3 chunks
-        context = " ".join([chunks[i] for i in top_k_indices[0]])
-        # 2. GENERATE: Create a prompt and get an answer
-        prompt = f"""
-        Based on the following context, provide a detailed answer to the user's question.
-        CONTEXT:
-        ---
-        {context}
-        ---
-        QUESTION: {user_question}
-        ANSWER:
-        """
-        result = generator(
-            prompt,
-            max_length=512,
-            num_beams=4,
-            temperature=0.1
-        )
-        bot_response = result[0]['generated_text']
-    except Exception as e:
-        raise gr.Error(f"An error occurred during processing: {e}")
-    chat_history.append((user_question, bot_response))
-    return "", chat_history
-# --- Gradio UI Definition ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="teal"), title="Text File Analyzer") as demo:
-    app_state = gr.State({})
-    with gr.Column(visible=True) as welcome_page:
-        gr.Markdown(
-            """
-            <div style='text-align: center; font-family: "Garamond", serif; padding-top: 30px;'>
-                <h1 style='font-size: 3.5em;'>Efficient Text File Analyzer</h1>
-                <p style='font-size: 1.5em; color: #555;'>Chat with any .txt document using an efficient RAG pipeline.</p>
-            </div>
-            """
-        )
-        gr.HTML(
-            """
-            <div style='text-align: center; padding: 20px;'>
-                <img src='https://media.giphy.com/media/v1.Y2lkPTc5MGI3NjExd2Vjb3M2eGZzN2FkNWZpZzZ0bWl0c2JqZzZlMHVwZ2l4b2t0eXFpcyZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/YWjDA4k2n6d5Ew42zC/giphy.gif'
-                     style='max-width: 350px; margin: auto; border-radius: 20px; box-shadow: 0 8px 16px rgba(0,0,0,0.1);' />
-            </div>
-            """
-        )
-        # FIXED: Removed the unsupported 'horizontal_alignment' argument.
-        with gr.Column():
-            gr.Markdown("<h3 style='text-align: center;'>Upload Your Text File</h3>")
-            chat_file_upload = gr.File(label="Upload any .txt file", file_types=[".txt"])
-            lets_chat_button = gr.Button("💬 Process File and Start Chatting 💬", variant="primary")
-    with gr.Column(visible=False) as chat_page:
-        gr.Markdown("<h1 style='text-align: center;'>Chat with your Document</h1>")
-        chatbot_ui = gr.Chatbot(height=600, bubble_full_width=False)
-        with gr.Row():
-            user_input_box = gr.Textbox(placeholder="Ask a question about your file...", scale=5)
-            submit_button = gr.Button("Send", variant="primary", scale=1)
-    def go_to_chat(current_state, chat_file, progress=gr.Progress()):
-        if chat_file is None:
-            raise gr.Error("A file must be uploaded.")
-        progress(0, desc="Reading file...")
-        with open(chat_file.name, 'r', encoding='utf-8') as f:
-            content = f.read()
-        progress(0.2, desc="Chunking text...")
-        chunks = chunk_text(content)
-        progress(0.5, desc="Creating embeddings... (This might take a moment)")
-        embeddings = embedder.encode(chunks, show_progress_bar=True)
-        progress(0.8, desc="Building search index...")
-        index = faiss.IndexFlatL2(embeddings.shape[1])
-        index.add(np.array(embeddings).astype('float32'))
-        new_state = {
-            "index": index,
-            "chunks": chunks
-        }
-        progress(1, desc="Done!")
-        return (
-            new_state,
-            gr.Column(visible=False),
-            gr.Column(visible=True)
-        )
-    lets_chat_button.click(
-        fn=go_to_chat,
-        inputs=[app_state, chat_file_upload],
-        outputs=[app_state, welcome_page, chat_page]
-    )
-    submit_button.click(
-        fn=process_chat_request,
-        inputs=[user_input_box, chatbot_ui, app_state],
-        outputs=[user_input_box, chatbot_ui]
-    )
-    user_input_box.submit(
-        fn=process_chat_request,
-        inputs=[user_input_box, chatbot_ui, app_state],
-        outputs=[user_input_box, chatbot_ui]
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import re
+from sentence_transformers import SentenceTransformer, util
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import torch
+# --- Model Loading ---
+# Load the sentence transformer model for creating embeddings
+embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+# Load the T5 model and tokenizer for question answering
+qa_model_name = 'google/flan-t5-base'
+qa_tokenizer = T5Tokenizer.from_pretrained(qa_model_name)
+qa_model = T5ForConditionalGeneration.from_pretrained(qa_model_name)
+# --- Global Variables ---
+chat_history_embeddings = None
+chat_lines = []
+# --- Helper Functions ---
+def process_chat_file(file):
     """
+    Reads and parses the uploaded WhatsApp chat file.
     """
+    global chat_history_embeddings, chat_lines
+    if file is None:
+        return "Please upload a file first.", []
     try:
+        # Read the file content
+        with open(file.name, 'r', encoding='utf-8') as f:
+            content = f.read()
+        # Simple line-based parsing (can be improved with regex for more complex formats)
+        # This regex is a basic attempt and might need to be adjusted for different WhatsApp export formats.
+        # It tries to capture lines that start with a date and time.
+        lines = re.split(r'\n(?=\[\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{1,2}:\d{1,2}\])', content)
+        # Filter out empty lines and system messages
+        chat_lines = [line.strip() for line in lines if line.strip() and ":" in line]
+        if not chat_lines:
+            return "Could not find any chat messages in the file. Please check the file format.", []
+        # Create embeddings for the chat history
+        chat_history_embeddings = embedding_model.encode(chat_lines, convert_to_tensor=True)
+        return "File processed successfully! You can now ask questions.", []
+    except Exception as e:
+        return f"An error occurred: {e}", []
+def get_bot_response(user_message, history, temperature):
+    """
+    Generates a response from the chatbot.
+    """
+    global chat_history_embeddings, chat_lines
+    if chat_history_embeddings is None:
+        return "Please upload and process a chat file first."
+    # 1. Find relevant context from the chat history
+    question_embedding = embedding_model.encode(user_message, convert_to_tensor=True)
+    cos_scores = util.pytorch_cos_sim(question_embedding, chat_history_embeddings)[0]
+    # Get the top 5 most similar chat lines
+    top_k = min(5, len(chat_lines))
+    top_results = torch.topk(cos_scores, k=top_k)
+    context = ""
+    for score, idx in zip(top_results[0], top_results[1]):
+        context += chat_lines[idx] + "\n"
+    # 2. Generate an answer using the T5 model
+    prompt = f"""
+    Answer the following question based on the provided chat history.
+    If the answer is not in the context, say "I couldn't find an answer to that in the chat history."
+    Chat History:
+    {context}
+    Question: {user_message}
+    Answer:
+    """
+    input_ids = qa_tokenizer.encode(prompt, return_tensors='pt')
+    # Generate the output
+    output_ids = qa_model.generate(
+        input_ids,
+        max_length=150,
+        num_beams=4,
+        temperature=temperature,
+        early_stopping=True
     )
+    answer = qa_tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return answer
+# --- Gradio UI ---
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="orange")) as demo:
+    gr.Markdown("# 💬 Chat with your WhatsApp History")
+    gr.Markdown("Upload your WhatsApp chat `.txt` file and ask questions about it!")
+    # Fun GIF
+    gr.HTML("""
+        <div style="text-align: center;">
+            <img src="https://media.giphy.com/media/v1.Y2lkPTc5MGI3NjExaDB2d2k5eXNoc2FqZzNqZzZqenp2cDIzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZzZ-/media/k-pop/images/bts-oppas-and-hyungs-and-dongsaengs-and-no.gif" alt="Chatbot GIF" style="width:300px; height:auto; border-radius: 15px;">
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            file_upload = gr.File(label="Upload WhatsApp Chat (.txt)")
+            process_button = gr.Button("Process File")
+            upload_status = gr.Textbox(label="Status", interactive=False)
+            temperature_slider = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.1,
+                step=0.1,
+                label="Temperature",
+                info="Lower values are more accurate, higher values are more creative."
+            )
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Chat")
+            msg = gr.Textbox(label="Your Question")
+            clear = gr.ClearButton([msg, chatbot])
+    # --- Event Handlers ---
+    file_upload.upload(process_chat_file, inputs=[file_upload], outputs=[upload_status, chatbot])
+    process_button.click(process_chat_file, inputs=[file_upload], outputs=[upload_status, chatbot])
+    msg.submit(get_bot_response, [msg, chatbot, temperature_slider], [msg, chatbot])
 if __name__ == "__main__":
+    demo.launch(debug=True)