husseinelsaadi commited on
Commit
ab83281
·
1 Parent(s): 0c4a8eb

chatbot updated

Browse files
Files changed (1) hide show
  1. chatbot/chatbot.py +205 -151
chatbot/chatbot.py CHANGED
@@ -1,15 +1,15 @@
1
  # codingo/chatbot/chatbot.py
2
- """Chatbot module for Codingo
3
-
4
- Default model changed to blenderbot-400M-distill; generation uses max_new_tokens; fallback between causal and seq2seq models."""
5
 
6
  import os
7
  import shutil
8
  from typing import List
 
9
 
10
  os.environ.setdefault("HF_HOME", "/tmp/huggingface")
11
  os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
12
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
 
13
 
14
  _hf_model = None
15
  _hf_tokenizer = None
@@ -20,7 +20,8 @@ _current_dir = os.path.dirname(os.path.abspath(__file__))
20
  _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
21
  _chroma_db_dir = "/tmp/chroma_db"
22
 
23
- DEFAULT_MODEL_NAME = "facebook/blenderbot-400M-distill"
 
24
 
25
  def _init_hf_model() -> None:
26
  from transformers import (
@@ -34,206 +35,259 @@ def _init_hf_model() -> None:
34
  if _hf_model is not None and _hf_tokenizer is not None:
35
  return
36
 
 
37
  model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
38
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39
 
40
- # Initialize tokenizer with proper configuration
41
- tokenizer = AutoTokenizer.from_pretrained(model_name)
42
 
43
- # Try loading the model with proper error handling
44
  try:
45
- model = AutoModelForCausalLM.from_pretrained(model_name)
46
- model_type = "causal"
47
- except Exception:
 
 
48
  try:
49
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
50
- model_type = "seq2seq"
 
 
 
 
 
51
  except Exception as e:
52
- print(f"Error loading model: {e}")
53
- raise
54
-
55
- # Move model to device
56
- model = model.to(device)
57
- model.eval() # Set to evaluation mode
58
-
59
- # Ensure proper padding token configuration
60
- if tokenizer.pad_token is None:
61
- if tokenizer.eos_token is not None:
 
 
 
 
 
 
62
  tokenizer.pad_token = tokenizer.eos_token
63
- else:
64
- tokenizer.add_special_tokens({'pad_token': '[PAD]'})
65
- model.resize_token_embeddings(len(tokenizer))
66
-
67
- # Store model type for later use
68
- model.model_type = model_type
69
-
70
- _hf_model = model
71
- _hf_tokenizer = tokenizer
 
 
 
 
72
 
73
  def _init_vector_store() -> None:
74
  global _chatbot_embedder, _chatbot_collection
75
  if _chatbot_embedder is not None and _chatbot_collection is not None:
76
  return
77
 
78
- from langchain.text_splitter import RecursiveCharacterTextSplitter
79
- from sentence_transformers import SentenceTransformer
80
- import chromadb
81
- from chromadb.config import Settings
82
-
83
- # Clean up old database
84
- shutil.rmtree(_chroma_db_dir, ignore_errors=True)
85
- os.makedirs(_chroma_db_dir, exist_ok=True)
86
 
87
  try:
88
- with open(_knowledge_base_path, encoding="utf-8") as f:
89
- raw_text = f.read()
90
- except FileNotFoundError:
91
- raw_text = (
92
- "Codingo is an AI-powered recruitment platform designed to "
93
- "streamline job applications, candidate screening, and hiring. "
94
- "We make hiring smarter, faster, and fairer through automation "
95
- "and intelligent recommendations."
96
- )
97
-
98
- splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
99
- docs: List[str] = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
100
-
101
- # Initialize embedder
102
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
103
- embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
104
-
105
- # Initialize ChromaDB
106
- client = chromadb.Client(Settings(
107
- persist_directory=_chroma_db_dir,
108
- anonymized_telemetry=False,
109
- is_persistent=True,
110
- ))
111
-
112
- # Create or recreate collection
113
- try:
114
- client.delete_collection("chatbot")
115
- except:
116
- pass
117
-
118
- collection = client.create_collection("chatbot")
119
-
120
- # Add documents
121
- ids = [f"doc_{i}" for i in range(len(docs))]
122
- collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
123
 
124
- _chatbot_embedder = embedder
125
- _chatbot_collection = collection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  def get_chatbot_response(query: str) -> str:
128
  try:
 
 
129
  if not query or not query.strip():
130
  return "Please type a question about the Codingo platform."
131
 
132
- # Clear GPU cache before processing
133
  import torch
134
  if torch.cuda.is_available():
135
  torch.cuda.empty_cache()
 
136
 
137
- _init_vector_store()
138
- _init_hf_model()
 
 
 
 
 
 
 
 
 
 
139
 
140
  embedder = _chatbot_embedder
141
  collection = _chatbot_collection
142
  model = _hf_model
143
  tokenizer = _hf_tokenizer
144
 
145
- import torch
146
-
147
  # Get relevant documents
 
148
  query_embedding = embedder.encode([query])[0]
 
 
149
  results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
150
  retrieved_docs = results.get("documents", [[]])[0] if results else []
151
- context = "\n".join(retrieved_docs[:3])
152
-
153
- # Prepare the prompt based on model type
 
154
  if hasattr(model, 'model_type') and model.model_type == "seq2seq":
155
- # For seq2seq models like BlenderBot
156
  prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
157
  else:
158
- # For causal models
159
- system_instruction = (
160
- "You are LUNA AI, a helpful assistant for the Codingo recruitment "
161
- "platform. Use the provided context to answer questions about "
162
- "Codingo. If the question is not related to Codingo, politely "
163
- "redirect the conversation. Keep responses concise and friendly."
 
 
 
 
 
 
 
 
 
164
  )
165
- prompt = f"{system_instruction}\n\nContext:\n{context}\n\nUser: {query}\nLUNA AI:"
166
-
167
- # Tokenize with proper handling
168
- inputs = tokenizer(
169
- prompt,
170
- return_tensors="pt",
171
- truncation=True,
172
- max_length=512,
173
- padding=True,
174
- return_attention_mask=True
175
- )
176
-
177
- # Move all tensors to the same device
178
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
179
-
180
- # Generate response with error handling
 
181
  with torch.no_grad():
182
  try:
183
- # Use different generation parameters based on model type
184
- if hasattr(model, 'model_type') and model.model_type == "seq2seq":
185
- output_ids = model.generate(
186
- input_ids=inputs['input_ids'],
187
- attention_mask=inputs['attention_mask'],
188
- max_new_tokens=150,
189
- min_length=10,
190
- num_beams=3,
191
- do_sample=True,
192
- temperature=0.7,
193
- top_p=0.9,
194
- pad_token_id=tokenizer.pad_token_id,
195
- eos_token_id=tokenizer.eos_token_id,
196
- early_stopping=True,
197
- )
198
- else:
 
 
 
 
199
  output_ids = model.generate(
200
  input_ids=inputs['input_ids'],
201
- attention_mask=inputs['attention_mask'],
202
- max_new_tokens=150,
203
- num_beams=3,
204
- do_sample=True,
205
- temperature=0.7,
206
  pad_token_id=tokenizer.pad_token_id,
207
- eos_token_id=tokenizer.eos_token_id,
208
  )
209
- except Exception as e:
210
- print(f"Generation error: {e}")
211
- # Fallback to a simple response
212
- return "I'm here to help you with questions about the Codingo platform. Could you please rephrase your question?"
213
 
214
- # Decode the response
 
215
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
216
 
217
- # Clean up the response
218
- if "Assistant:" in response:
219
- response = response.split("Assistant:")[-1].strip()
220
- elif "LUNA AI:" in response:
221
  response = response.split("LUNA AI:")[-1].strip()
222
- elif prompt in response:
223
- response = response.replace(prompt, "").strip()
224
 
225
- # Remove the input prompt if it's still in the response
226
  if query in response:
227
- response = response.split(query)[-1].strip()
228
-
229
- return (
230
- response
231
- if response and len(response) > 5
232
- else "I'm here to help you with questions about the Codingo platform. What would you like to know?"
233
- )
 
 
 
234
 
235
  except Exception as e:
236
- print(f"Chatbot error: {e}")
237
- import traceback
238
  traceback.print_exc()
239
- return "I apologize, but I'm having trouble processing your request. Please try again with a different question about Codingo."
 
1
  # codingo/chatbot/chatbot.py
2
+ """Chatbot module for Codingo with enhanced debugging"""
 
 
3
 
4
  import os
5
  import shutil
6
  from typing import List
7
+ import traceback
8
 
9
  os.environ.setdefault("HF_HOME", "/tmp/huggingface")
10
  os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
11
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
12
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # Enable synchronous CUDA errors
13
 
14
  _hf_model = None
15
  _hf_tokenizer = None
 
20
  _knowledge_base_path = os.path.join(_current_dir, "chatbot.txt")
21
  _chroma_db_dir = "/tmp/chroma_db"
22
 
23
+ # Try a smaller, more reliable model for debugging
24
+ DEFAULT_MODEL_NAME = "microsoft/DialoGPT-small"
25
 
26
  def _init_hf_model() -> None:
27
  from transformers import (
 
35
  if _hf_model is not None and _hf_tokenizer is not None:
36
  return
37
 
38
+ print("Initializing HF model...")
39
  model_name = os.getenv("HF_CHATBOT_MODEL", DEFAULT_MODEL_NAME)
40
+ print(f"Loading model: {model_name}")
41
 
42
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
+ print(f"Using device: {device}")
44
 
 
45
  try:
46
+ # Initialize tokenizer
47
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
48
+ print("Tokenizer loaded successfully")
49
+
50
+ # Try loading the model
51
  try:
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ model_name,
54
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
55
+ low_cpu_mem_usage=True
56
+ )
57
+ model_type = "causal"
58
+ print("Loaded as causal model")
59
  except Exception as e:
60
+ print(f"Failed to load as causal model: {e}")
61
+ model = AutoModelForSeq2SeqLM.from_pretrained(
62
+ model_name,
63
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
64
+ low_cpu_mem_usage=True
65
+ )
66
+ model_type = "seq2seq"
67
+ print("Loaded as seq2seq model")
68
+
69
+ # Move model to device
70
+ model = model.to(device)
71
+ model.eval()
72
+ print("Model moved to device and set to eval mode")
73
+
74
+ # Configure padding token
75
+ if tokenizer.pad_token is None:
76
  tokenizer.pad_token = tokenizer.eos_token
77
+ print(f"Set pad_token to: {tokenizer.pad_token}")
78
+
79
+ # Store model type
80
+ model.model_type = model_type
81
+
82
+ _hf_model = model
83
+ _hf_tokenizer = tokenizer
84
+ print("Model initialization complete")
85
+
86
+ except Exception as e:
87
+ print(f"Error during model initialization: {e}")
88
+ traceback.print_exc()
89
+ raise
90
 
91
  def _init_vector_store() -> None:
92
  global _chatbot_embedder, _chatbot_collection
93
  if _chatbot_embedder is not None and _chatbot_collection is not None:
94
  return
95
 
96
+ print("Initializing vector store...")
 
 
 
 
 
 
 
97
 
98
  try:
99
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
100
+ from sentence_transformers import SentenceTransformer
101
+ import chromadb
102
+ from chromadb.config import Settings
103
+
104
+ # Clean up old database
105
+ shutil.rmtree(_chroma_db_dir, ignore_errors=True)
106
+ os.makedirs(_chroma_db_dir, exist_ok=True)
107
+
108
+ # Load knowledge base
109
+ try:
110
+ with open(_knowledge_base_path, encoding="utf-8") as f:
111
+ raw_text = f.read()
112
+ print(f"Loaded knowledge base with {len(raw_text)} characters")
113
+ except FileNotFoundError:
114
+ print("Knowledge base file not found, using default text")
115
+ raw_text = (
116
+ "Codingo is an AI-powered recruitment platform designed to "
117
+ "streamline job applications, candidate screening, and hiring. "
118
+ "We make hiring smarter, faster, and fairer through automation "
119
+ "and intelligent recommendations."
120
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ # Split text
123
+ splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
124
+ docs = [doc.strip() for doc in splitter.split_text(raw_text) if doc.strip()]
125
+ print(f"Split into {len(docs)} documents")
126
+
127
+ # Initialize embedder
128
+ print("Loading sentence transformer...")
129
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
130
+ print("Encoding documents...")
131
+ embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
132
+ print(f"Created {len(embeddings)} embeddings")
133
+
134
+ # Initialize ChromaDB (use in-memory for HF Spaces)
135
+ print("Initializing ChromaDB...")
136
+ client = chromadb.Client(Settings(
137
+ anonymized_telemetry=False,
138
+ is_persistent=False, # Changed to False for HF Spaces
139
+ ))
140
+
141
+ # Create collection
142
+ try:
143
+ client.delete_collection("chatbot")
144
+ except:
145
+ pass
146
+
147
+ collection = client.create_collection("chatbot")
148
+
149
+ # Add documents
150
+ ids = [f"doc_{i}" for i in range(len(docs))]
151
+ collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids)
152
+ print(f"Added {len(docs)} documents to collection")
153
+
154
+ _chatbot_embedder = embedder
155
+ _chatbot_collection = collection
156
+ print("Vector store initialization complete")
157
+
158
+ except Exception as e:
159
+ print(f"Error during vector store initialization: {e}")
160
+ traceback.print_exc()
161
+ raise
162
 
163
  def get_chatbot_response(query: str) -> str:
164
  try:
165
+ print(f"\n=== Processing query: {query} ===")
166
+
167
  if not query or not query.strip():
168
  return "Please type a question about the Codingo platform."
169
 
170
+ # Clear GPU cache
171
  import torch
172
  if torch.cuda.is_available():
173
  torch.cuda.empty_cache()
174
+ print("Cleared GPU cache")
175
 
176
+ # Initialize components
177
+ try:
178
+ _init_vector_store()
179
+ except Exception as e:
180
+ print(f"Vector store initialization failed: {e}")
181
+ return "I'm having trouble accessing my knowledge base. Please try again later."
182
+
183
+ try:
184
+ _init_hf_model()
185
+ except Exception as e:
186
+ print(f"Model initialization failed: {e}")
187
+ return "I'm having trouble loading my language model. Please try again later."
188
 
189
  embedder = _chatbot_embedder
190
  collection = _chatbot_collection
191
  model = _hf_model
192
  tokenizer = _hf_tokenizer
193
 
 
 
194
  # Get relevant documents
195
+ print("Creating query embedding...")
196
  query_embedding = embedder.encode([query])[0]
197
+
198
+ print("Searching for relevant documents...")
199
  results = collection.query(query_embeddings=[query_embedding.tolist()], n_results=3)
200
  retrieved_docs = results.get("documents", [[]])[0] if results else []
201
+ context = "\n".join(retrieved_docs[:3]) if retrieved_docs else ""
202
+ print(f"Retrieved {len(retrieved_docs)} documents")
203
+
204
+ # Prepare prompt
205
  if hasattr(model, 'model_type') and model.model_type == "seq2seq":
 
206
  prompt = f"Context: {context}\n\nUser: {query}\nAssistant:"
207
  else:
208
+ # For DialoGPT or other causal models
209
+ prompt = f"Context: {context}\n\nUser: {query}\nLUNA AI:"
210
+
211
+ print(f"Prompt length: {len(prompt)} characters")
212
+
213
+ # Tokenize
214
+ print("Tokenizing input...")
215
+ try:
216
+ inputs = tokenizer(
217
+ prompt,
218
+ return_tensors="pt",
219
+ truncation=True,
220
+ max_length=400, # Reduced for safety
221
+ padding=True,
222
+ return_attention_mask=True
223
  )
224
+ print(f"Input shape: {inputs['input_ids'].shape}")
225
+ except Exception as e:
226
+ print(f"Tokenization error: {e}")
227
+ traceback.print_exc()
228
+ return "I had trouble processing your input. Please try a shorter question."
229
+
230
+ # Move to device
 
 
 
 
 
 
231
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
232
+
233
+ # Generate response
234
+ print("Generating response...")
235
  with torch.no_grad():
236
  try:
237
+ output_ids = model.generate(
238
+ input_ids=inputs['input_ids'],
239
+ attention_mask=inputs['attention_mask'],
240
+ max_new_tokens=100, # Reduced for safety
241
+ min_length=10,
242
+ num_beams=2, # Reduced for memory
243
+ do_sample=True,
244
+ temperature=0.8,
245
+ pad_token_id=tokenizer.pad_token_id,
246
+ eos_token_id=tokenizer.eos_token_id,
247
+ early_stopping=True,
248
+ )
249
+ print(f"Output shape: {output_ids.shape}")
250
+ except Exception as e:
251
+ print(f"Generation error: {e}")
252
+ traceback.print_exc()
253
+
254
+ # Try a simpler generation
255
+ try:
256
+ print("Trying simpler generation...")
257
  output_ids = model.generate(
258
  input_ids=inputs['input_ids'],
259
+ max_new_tokens=50,
 
 
 
 
260
  pad_token_id=tokenizer.pad_token_id,
 
261
  )
262
+ except Exception as e2:
263
+ print(f"Simple generation also failed: {e2}")
264
+ return "I'm having trouble generating a response. Please try again."
 
265
 
266
+ # Decode response
267
+ print("Decoding response...")
268
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
269
+ print(f"Raw response: {response[:100]}...")
270
 
271
+ # Clean up response
272
+ if "LUNA AI:" in response:
 
 
273
  response = response.split("LUNA AI:")[-1].strip()
274
+ elif "Assistant:" in response:
275
+ response = response.split("Assistant:")[-1].strip()
276
 
277
+ # Remove the input if it's in the response
278
  if query in response:
279
+ response = response.replace(query, "").strip()
280
+
281
+ # Final cleanup
282
+ response = response.strip()
283
+
284
+ if not response or len(response) < 5:
285
+ response = "I'm here to help you with questions about the Codingo platform. What would you like to know?"
286
+
287
+ print(f"Final response: {response}")
288
+ return response
289
 
290
  except Exception as e:
291
+ print(f"Unexpected error in get_chatbot_response: {e}")
 
292
  traceback.print_exc()
293
+ return "I apologize, but I encountered an unexpected error. Please try again with a different question."