Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on 19 days ago

Commit

ab83281

1 Parent(s): 0c4a8eb

chatbot updated

Browse files

Files changed (1) hide show

chatbot/chatbot.py +205 -151

chatbot/chatbot.py CHANGED Viewed

@@ -1,15 +1,15 @@
 # codingo/chatbot/chatbot.py
-"""Chatbot module for Codingo …
-Default model changed to blenderbot-400M-distill; generation uses max_new_tokens; fallback between causal and seq2seq models."""
 import os
 import shutil
 from typing import List
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
 _hf_model = None
 _hf_tokenizer = None
@@ -20,7 +20,8 @@ _current_dir = os.path.dirname(os.path.abspath(__file__))
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
-DEFAULT_MODEL_NAME = "facebook/blenderbot-400M-distill"
 def _init_hf_model() -> None:
     from transformers import (
@@ -34,206 +35,259 @@ def _init_hf_model() -> None:
     if _hf_model is not None and _hf_tokenizer is not None:
         return
     model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Initialize tokenizer with proper configuration
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Try loading the model with proper error handling
     try:
-        model = AutoModelForCausalLM.from_pretrained(model_name)
-        model_type = "causal"
-    except Exception:
         try:
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-            model_type = "seq2seq"
         except Exception as e:
-            print(f"Error loading model: {e}")
-            raise
-    # Move model to device
-    model = model.to(device)
-    model.eval()  # Set to evaluation mode
-    # Ensure proper padding token configuration
-    if tokenizer.pad_token is None:
-        if tokenizer.eos_token is not None:
             tokenizer.pad_token = tokenizer.eos_token
-        else:
-            tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-            model.resize_token_embeddings(len(tokenizer))
-    # Store model type for later use
-    model.model_type = model_type
-    _hf_model = model
-    _hf_tokenizer = tokenizer
 def _init_vector_store() -> None:
     global _chatbot_embedder, _chatbot_collection
     if _chatbot_embedder is not None and _chatbot_collection is not None:
         return
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
-    from sentence_transformers import SentenceTransformer
-    import chromadb
-    from chromadb.config import Settings
-    # Clean up old database
-    shutil.rmtree(_chroma_db_dir, ignore_errors=True)
-    os.makedirs(_chroma_db_dir, exist_ok=True)
     try:
-        with open(_knowledge_base_path, encoding="utf-8") as f:
-            raw_text = f.read()
-    except FileNotFoundError:
-        raw_text = (
-            "Codingo is an AI-powered recruitment platform designed to "
-            "streamline job applications, candidate screening, and hiring. "
-            "We make hiring smarter, faster, and fairer through automation "
-            "and intelligent recommendations."
-        )
-    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
-    docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
-    # Initialize embedder
-    embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
-    # Initialize ChromaDB
-    client = chromadb.Client(Settings(
-        persist_directory=_chroma_db_dir,
-        anonymized_telemetry=False,
-        is_persistent=True,
-    ))
-    # Create or recreate collection
-    try:
-        client.delete_collection("chatbot")
-    except:
-        pass
-    collection = client.create_collection("chatbot")
-    # Add documents
-    ids = [f"doc_{i}" for i in range(len(docs))]
-    collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
-    _chatbot_embedder = embedder
-    _chatbot_collection = collection
 def get_chatbot_response(query: str) -> str:
     try:
         if not query or not query.strip():
             return "Please type a question about the Codingo platform."
-        # Clear GPU cache before processing
         import torch
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        _init_vector_store()
-        _init_hf_model()
         embedder = _chatbot_embedder
         collection = _chatbot_collection
         model = _hf_model
         tokenizer = _hf_tokenizer
-        import torch
         # Get relevant documents
         query_embedding = embedder.encode([query])[0]
         results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
         retrieved_docs = results.get("documents", [[]])[0] if results else []
-        context = "\n".join(retrieved_docs[:3])
-        # Prepare the prompt based on model type
         if hasattr(model, 'model_type') and model.model_type == "seq2seq":
-            # For seq2seq models like BlenderBot
             prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
         else:
-            # For causal models
-            system_instruction = (
-                "You are LUNA AI, a helpful assistant for the Codingo recruitment "
-                "platform. Use the provided context to answer questions about "
-                "Codingo. If the question is not related to Codingo, politely "
-                "redirect the conversation. Keep responses concise and friendly."
             )
-            prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
-        # Tokenize with proper handling
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512,
-            padding=True,
-            return_attention_mask=True
-        )
-        # Move all tensors to the same device
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # Generate response with error handling
         with torch.no_grad():
             try:
-                # Use different generation parameters based on model type
-                if hasattr(model, 'model_type') and model.model_type == "seq2seq":
-                    output_ids = model.generate(
-                        input_ids=inputs['input_ids'],
-                        attention_mask=inputs['attention_mask'],
-                        max_new_tokens=150,
-                        min_length=10,
-                        num_beams=3,
-                        do_sample=True,
-                        temperature=0.7,
-                        top_p=0.9,
-                        pad_token_id=tokenizer.pad_token_id,
-                        eos_token_id=tokenizer.eos_token_id,
-                        early_stopping=True,
-                    )
-                else:
                     output_ids = model.generate(
                         input_ids=inputs['input_ids'],
-                        attention_mask=inputs['attention_mask'],
-                        max_new_tokens=150,
-                        num_beams=3,
-                        do_sample=True,
-                        temperature=0.7,
                         pad_token_id=tokenizer.pad_token_id,
-                        eos_token_id=tokenizer.eos_token_id,
                     )
-            except Exception as e:
-                print(f"Generation error: {e}")
-                # Fallback to a simple response
-                return "I'm here to help you with questions about the Codingo platform. Could you please rephrase your question?"
-        # Decode the response
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Clean up the response
-        if "Assistant:" in response:
-            response = response.split("Assistant:")[-1].strip()
-        elif "LUNA AI:" in response:
             response = response.split("LUNA AI:")[-1].strip()
-        elif prompt in response:
-            response = response.replace(prompt, "").strip()
-        # Remove the input prompt if it's still in the response
         if query in response:
-            response = response.split(query)[-1].strip()
-        return (
-            response
-            if response and len(response) > 5
-            else "I'm here to help you with questions about the Codingo platform. What would you like to know?"
-        )
     except Exception as e:
-        print(f"Chatbot error: {e}")
-        import traceback
         traceback.print_exc()
-        return "I apologize, but I'm having trouble processing your request. Please try again with a different question about Codingo."

 # codingo/chatbot/chatbot.py
+"""Chatbot module for Codingo with enhanced debugging"""
 import os
 import shutil
 from typing import List
+import traceback
 os.environ.setdefault("HF_HOME", "/tmp/huggingface")
 os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
 os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Enable synchronous CUDA errors
 _hf_model = None
 _hf_tokenizer = None
 _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
 _chroma_db_dir = "/tmp/chroma_db"
+# Try a smaller, more reliable model for debugging
+DEFAULT_MODEL_NAME = "microsoft/DialoGPT-small"
 def _init_hf_model() -> None:
     from transformers import (
     if _hf_model is not None and _hf_tokenizer is not None:
         return
+    print("Initializing HF model...")
     model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
+    print(f"Loading model: {model_name}")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
     try:
+        # Initialize tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        print("Tokenizer loaded successfully")
+        # Try loading the model
         try:
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                low_cpu_mem_usage=True
+            )
+            model_type = "causal"
+            print("Loaded as causal model")
         except Exception as e:
+            print(f"Failed to load as causal model: {e}")
+            model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                low_cpu_mem_usage=True
+            )
+            model_type = "seq2seq"
+            print("Loaded as seq2seq model")
+        # Move model to device
+        model = model.to(device)
+        model.eval()
+        print("Model moved to device and set to eval mode")
+        # Configure padding token
+        if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+            print(f"Set pad_token to: {tokenizer.pad_token}")
+        # Store model type
+        model.model_type = model_type
+        _hf_model = model
+        _hf_tokenizer = tokenizer
+        print("Model initialization complete")
+    except Exception as e:
+        print(f"Error during model initialization: {e}")
+        traceback.print_exc()
+        raise
 def _init_vector_store() -> None:
     global _chatbot_embedder, _chatbot_collection
     if _chatbot_embedder is not None and _chatbot_collection is not None:
         return
+    print("Initializing vector store...")
     try:
+        from langchain.text_splitter import RecursiveCharacterTextSplitter
+        from sentence_transformers import SentenceTransformer
+        import chromadb
+        from chromadb.config import Settings
+        # Clean up old database
+        shutil.rmtree(_chroma_db_dir, ignore_errors=True)
+        os.makedirs(_chroma_db_dir, exist_ok=True)
+        # Load knowledge base
+        try:
+            with open(_knowledge_base_path, encoding="utf-8") as f:
+                raw_text = f.read()
+                print(f"Loaded knowledge base with {len(raw_text)} characters")
+        except FileNotFoundError:
+            print("Knowledge base file not found, using default text")
+            raw_text = (
+                "Codingo is an AI-powered recruitment platform designed to "
+                "streamline job applications, candidate screening, and hiring. "
+                "We make hiring smarter, faster, and fairer through automation "
+                "and intelligent recommendations."
+            )
+        # Split text
+        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
+        docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
+        print(f"Split into {len(docs)} documents")
+        # Initialize embedder
+        print("Loading sentence transformer...")
+        embedder = SentenceTransformer("all-MiniLM-L6-v2")
+        print("Encoding documents...")
+        embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
+        print(f"Created {len(embeddings)} embeddings")
+        # Initialize ChromaDB (use in-memory for HF Spaces)
+        print("Initializing ChromaDB...")
+        client = chromadb.Client(Settings(
+            anonymized_telemetry=False,
+            is_persistent=False,  # Changed to False for HF Spaces
+        ))
+        # Create collection
+        try:
+            client.delete_collection("chatbot")
+        except:
+            pass
+        collection = client.create_collection("chatbot")
+        # Add documents
+        ids = [f"doc_{i}" for i in range(len(docs))]
+        collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
+        print(f"Added {len(docs)} documents to collection")
+        _chatbot_embedder = embedder
+        _chatbot_collection = collection
+        print("Vector store initialization complete")
+    except Exception as e:
+        print(f"Error during vector store initialization: {e}")
+        traceback.print_exc()
+        raise
 def get_chatbot_response(query: str) -> str:
     try:
+        print(f"\n=== Processing query: {query} ===")
         if not query or not query.strip():
             return "Please type a question about the Codingo platform."
+        # Clear GPU cache
         import torch
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+            print("Cleared GPU cache")
+        # Initialize components
+        try:
+            _init_vector_store()
+        except Exception as e:
+            print(f"Vector store initialization failed: {e}")
+            return "I'm having trouble accessing my knowledge base. Please try again later."
+        try:
+            _init_hf_model()
+        except Exception as e:
+            print(f"Model initialization failed: {e}")
+            return "I'm having trouble loading my language model. Please try again later."
         embedder = _chatbot_embedder
         collection = _chatbot_collection
         model = _hf_model
         tokenizer = _hf_tokenizer
         # Get relevant documents
+        print("Creating query embedding...")
         query_embedding = embedder.encode([query])[0]
+        print("Searching for relevant documents...")
         results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
         retrieved_docs = results.get("documents", [[]])[0] if results else []
+        context = "\n".join(retrieved_docs[:3]) if retrieved_docs else ""
+        print(f"Retrieved {len(retrieved_docs)} documents")
+        # Prepare prompt
         if hasattr(model, 'model_type') and model.model_type == "seq2seq":
             prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
         else:
+            # For DialoGPT or other causal models
+            prompt = f"Context: {context}\n\nUser: {query}\nLUNA AI:"
+        print(f"Prompt length: {len(prompt)} characters")
+        # Tokenize
+        print("Tokenizing input...")
+        try:
+            inputs = tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                max_length=400,  # Reduced for safety
+                padding=True,
+                return_attention_mask=True
             )
+            print(f"Input shape: {inputs['input_ids'].shape}")
+        except Exception as e:
+            print(f"Tokenization error: {e}")
+            traceback.print_exc()
+            return "I had trouble processing your input. Please try a shorter question."
+        # Move to device
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # Generate response
+        print("Generating response...")
         with torch.no_grad():
             try:
+                output_ids = model.generate(
+                    input_ids=inputs['input_ids'],
+                    attention_mask=inputs['attention_mask'],
+                    max_new_tokens=100,  # Reduced for safety
+                    min_length=10,
+                    num_beams=2,  # Reduced for memory
+                    do_sample=True,
+                    temperature=0.8,
+                    pad_token_id=tokenizer.pad_token_id,
+                    eos_token_id=tokenizer.eos_token_id,
+                    early_stopping=True,
+                )
+                print(f"Output shape: {output_ids.shape}")
+            except Exception as e:
+                print(f"Generation error: {e}")
+                traceback.print_exc()
+                # Try a simpler generation
+                try:
+                    print("Trying simpler generation...")
                     output_ids = model.generate(
                         input_ids=inputs['input_ids'],
+                        max_new_tokens=50,
                         pad_token_id=tokenizer.pad_token_id,
                     )
+                except Exception as e2:
+                    print(f"Simple generation also failed: {e2}")
+                    return "I'm having trouble generating a response. Please try again."
+        # Decode response
+        print("Decoding response...")
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        print(f"Raw response: {response[:100]}...")
+        # Clean up response
+        if "LUNA AI:" in response:
             response = response.split("LUNA AI:")[-1].strip()
+        elif "Assistant:" in response:
+            response = response.split("Assistant:")[-1].strip()
+        # Remove the input if it's in the response
         if query in response:
+            response = response.replace(query, "").strip()
+        # Final cleanup
+        response = response.strip()
+        if not response or len(response) < 5:
+            response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
+        print(f"Final response: {response}")
+        return response
     except Exception as e:
+        print(f"Unexpected error in get_chatbot_response: {e}")
         traceback.print_exc()
+        return "I apologize, but I encountered an unexpected error. Please try again with a different question."