Spaces:

Srikesh
/

pdf_chat

Sleeping

App Files Files Community

Srikesh commited on about 1 month ago

Commit

dd765b2

verified ·

1 Parent(s): ec0c183

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -62

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import gradio as gr
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from pypdf import PdfReader
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # Global variables
 chunks = []
@@ -11,30 +15,72 @@ embeddings = []
 model = None
 tokenizer = None
 embed_model = None
 def initialize_models():
-    """Initialize models on startup"""
     global model, tokenizer, embed_model
     print("Loading models...")
-    # Load embedding model
-    embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-    # Load language model
-    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float32,
-        low_cpu_mem_usage=True
     )
     print("Models loaded successfully!")
 def process_pdf(pdf_file):
-    """Process PDF and create embeddings"""
-    global chunks, embeddings, embed_model
     if pdf_file is None:
         return "❌ Please upload a PDF file!", None
@@ -49,37 +95,44 @@ def process_pdf(pdf_file):
         if not text.strip():
             return "❌ Could not extract text from PDF!", None
-        # Split into chunks
-        chunk_size = 1000
-        overlap = 200
-        chunks = []
-        for i in range(0, len(text), chunk_size - overlap):
-            chunk = text[i:i + chunk_size]
-            if chunk.strip():
-                chunks.append(chunk)
-        # Create embeddings
-        embeddings = embed_model.encode(chunks, show_progress_bar=False)
         return f"✅ PDF processed! Created {len(chunks)} chunks. You can now ask questions!", None
     except Exception as e:
         return f"❌ Error: {str(e)}", None
-def find_relevant_chunks(query, top_k=3):
-    """Find most relevant chunks using cosine similarity"""
     global chunks, embeddings, embed_model
-    if not chunks:
         return []
-    query_embedding = embed_model.encode([query])[0]
-    # Calculate cosine similarity
-    similarities = np.dot(embeddings, query_embedding) / (
-        np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding)
-    )
     # Get top k indices
     top_indices = np.argsort(similarities)[-top_k:][::-1]
@@ -87,42 +140,49 @@ def find_relevant_chunks(query, top_k=3):
     return [chunks[i] for i in top_indices]
 def generate_response(question, context):
-    """Generate response using the language model"""
     global model, tokenizer
-    prompt = f"""<|system|>
-You are a helpful assistant. Answer the question based on the provided context. Be concise and accurate.
-</s>
-<|user|>
-Context: {context}
 Question: {question}
-</s>
-<|assistant|>
-"""
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=300,
             temperature=0.7,
             top_p=0.9,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's response
-    if "<|assistant|>" in response:
-        response = response.split("<|assistant|>")[-1].strip()
     return response
 def chat(message, history):
-    """Handle chat"""
     global chunks
     if not chunks:
@@ -132,48 +192,103 @@ def chat(message, history):
         return history
     try:
-        # Find relevant context
-        relevant_chunks = find_relevant_chunks(message)
-        context = "\n\n".join(relevant_chunks)
         # Generate response
         response = generate_response(message, context)
         return history + [[message, response]]
     except Exception as e:
         return history + [[message, f"❌ Error: {str(e)}"]]
 def clear_all():
     """Clear everything"""
-    global chunks, embeddings
     chunks = []
     embeddings = []
     return None, "Ready to process a new PDF"
-# Create UI
-with gr.Blocks(title="Chat with PDF") as demo:
-    gr.Markdown("# 📄 Chat with PDF - Simple Version")
     with gr.Row():
         with gr.Column(scale=1):
-            pdf_input = gr.File(label="📎 Upload PDF", file_types=[".pdf"])
-            process_btn = gr.Button("🔄 Process PDF", variant="primary")
-            status = gr.Textbox(label="Status", lines=3)
-            clear_all_btn = gr.Button("🗑️ Clear All")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="💬 Chat", height=400)
-            msg = gr.Textbox(label="Question", placeholder="Ask about the PDF...")
             with gr.Row():
-                send_btn = gr.Button("Send", variant="primary")
                 clear_btn = gr.Button("Clear Chat")
     # Events
-    process_btn.click(process_pdf, [pdf_input], [status, chatbot])
-    msg.submit(chat, [msg, chatbot], [chatbot]).then(lambda: "", None, [msg])
-    send_btn.click(chat, [msg, chatbot], [chatbot]).then(lambda: "", None, [msg])
     clear_btn.click(lambda: None, None, [chatbot])
     clear_all_btn.click(clear_all, None, [chatbot, status])
@@ -182,4 +297,5 @@ with gr.Blocks(title="Chat with PDF") as demo:
 initialize_models()
 if __name__ == "__main__":
-    demo.launch()

+import os
+os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'
 import gradio as gr
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from pypdf import PdfReader
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import re
 # Global variables
 chunks = []
 model = None
 tokenizer = None
 embed_model = None
+text_cache = ""
 def initialize_models():
+    """Initialize models on startup with optimizations"""
     global model, tokenizer, embed_model
     print("Loading models...")
+    # Use smaller, faster embedding model
+    embed_model = SentenceTransformer(
+        'sentence-transformers/paraphrase-MiniLM-L3-v2',  # Faster, smaller model
+        device='cpu'
+    )
+    # Use smaller, faster language model
+    model_name = "microsoft/phi-1_5"  # Much faster than TinyLlama, better quality
+    # Alternative: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        trust_remote_code=True
+    )
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float32,
+        low_cpu_mem_usage=True,
+        trust_remote_code=True
     )
+    # Set padding token
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
     print("Models loaded successfully!")
+def smart_chunk_text(text, chunk_size=500, overlap=100):
+    """Smarter chunking that respects sentence boundaries"""
+    # Split into sentences
+    sentences = re.split(r'[.!?]+', text)
+    chunks = []
+    current_chunk = ""
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        # If adding this sentence exceeds chunk size, save current chunk
+        if len(current_chunk) + len(sentence) > chunk_size and current_chunk:
+            chunks.append(current_chunk)
+            # Start new chunk with overlap
+            words = current_chunk.split()
+            current_chunk = " ".join(words[-20:]) + " " + sentence
+        else:
+            current_chunk += " " + sentence
+    # Add the last chunk
+    if current_chunk:
+        chunks.append(current_chunk.strip())
+    return chunks
 def process_pdf(pdf_file):
+    """Process PDF and create embeddings - OPTIMIZED"""
+    global chunks, embeddings, embed_model, text_cache
     if pdf_file is None:
         return "❌ Please upload a PDF file!", None
         if not text.strip():
             return "❌ Could not extract text from PDF!", None
+        text_cache = text  # Cache for faster reprocessing
+        # Smart chunking (smaller chunks = faster embedding)
+        chunks = smart_chunk_text(text, chunk_size=500, overlap=100)
+        # Batch encode for speed
+        print(f"Creating embeddings for {len(chunks)} chunks...")
+        embeddings = embed_model.encode(
+            chunks,
+            batch_size=32,  # Process multiple chunks at once
+            show_progress_bar=False,
+            convert_to_numpy=True
+        )
         return f"✅ PDF processed! Created {len(chunks)} chunks. You can now ask questions!", None
     except Exception as e:
+        print(f"Error processing PDF: {str(e)}")
         return f"❌ Error: {str(e)}", None
+def find_relevant_chunks(query, top_k=2):  # Reduced from 3 to 2 for speed
+    """Find most relevant chunks - OPTIMIZED"""
     global chunks, embeddings, embed_model
+    if not chunks or len(embeddings) == 0:
         return []
+    # Encode query
+    query_embedding = embed_model.encode(
+        [query],
+        convert_to_numpy=True,
+        show_progress_bar=False
+    )[0]
+    # Fast cosine similarity using numpy
+    embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+    query_norm = query_embedding / np.linalg.norm(query_embedding)
+    similarities = np.dot(embeddings_norm, query_norm)
     # Get top k indices
     top_indices = np.argsort(similarities)[-top_k:][::-1]
     return [chunks[i] for i in top_indices]
 def generate_response(question, context):
+    """Generate response - OPTIMIZED"""
     global model, tokenizer
+    # Shorter, more efficient prompt
+    prompt = f"""Context: {context[:800]}
 Question: {question}
+Answer:"""
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=1024  # Reduced from 2048
+    )
+    # Faster generation settings
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=150,  # Reduced from 300
             temperature=0.7,
             top_p=0.9,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            num_beams=1,  # Greedy search for speed
+            early_stopping=True
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract answer
+    if "Answer:" in response:
+        response = response.split("Answer:")[-1].strip()
+    # Clean up response
+    response = response.split("\n")[0].strip()  # Take first line
     return response
 def chat(message, history):
+    """Handle chat - OPTIMIZED"""
     global chunks
     if not chunks:
         return history
     try:
+        # Find relevant context (reduced chunks)
+        relevant_chunks = find_relevant_chunks(message, top_k=2)
+        context = " ".join(relevant_chunks)
         # Generate response
         response = generate_response(message, context)
+        # Ensure response is not empty
+        if not response or len(response) < 10:
+            response = "I found relevant information but couldn't generate a clear answer. Please try rephrasing your question."
         return history + [[message, response]]
     except Exception as e:
+        print(f"Error in chat: {str(e)}")
         return history + [[message, f"❌ Error: {str(e)}"]]
 def clear_all():
     """Clear everything"""
+    global chunks, embeddings, text_cache
     chunks = []
     embeddings = []
+    text_cache = ""
     return None, "Ready to process a new PDF"
+# Create UI with better styling
+with gr.Blocks(title="Chat with PDF - Fast", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# ⚡ Chat with PDF - Optimized Fast Version")
+    gr.Markdown("*Using lightweight models for faster responses*")
     with gr.Row():
         with gr.Column(scale=1):
+            pdf_input = gr.File(
+                label="📎 Upload PDF",
+                file_types=[".pdf"]
+            )
+            process_btn = gr.Button(
+                "🔄 Process PDF",
+                variant="primary",
+                size="lg"
+            )
+            status = gr.Textbox(
+                label="Status",
+                lines=2,
+                interactive=False
+            )
+            gr.Markdown("### Tips:")
+            gr.Markdown("""
+            - Processing is much faster now!
+            - Ask specific questions
+            - Keep questions concise
+            """)
+            clear_all_btn = gr.Button("🗑️ Clear All", variant="stop")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                label="💬 Chat",
+                height=450,
+                bubble_full_width=False
+            )
+            msg = gr.Textbox(
+                label="Question",
+                placeholder="Ask a question about the PDF...",
+                lines=2
+            )
             with gr.Row():
+                send_btn = gr.Button("📤 Send", variant="primary")
                 clear_btn = gr.Button("Clear Chat")
     # Events
+    process_btn.click(
+        process_pdf,
+        inputs=[pdf_input],
+        outputs=[status, chatbot]
+    )
+    msg.submit(
+        chat,
+        inputs=[msg, chatbot],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",
+        None,
+        [msg]
+    )
+    send_btn.click(
+        chat,
+        inputs=[msg, chatbot],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",
+        None,
+        [msg]
+    )
     clear_btn.click(lambda: None, None, [chatbot])
     clear_all_btn.click(clear_all, None, [chatbot, status])
 initialize_models()
 if __name__ == "__main__":
+    demo.queue()  # Enable queuing for better performance
+    demo.launch(share=False)