wt002 commited on
Commit
3df23ae
·
verified ·
1 Parent(s): 9ac015d

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +9 -68
agent.py CHANGED
@@ -340,79 +340,34 @@ for name in enabled_tool_names:
340
 
341
 
342
  # -------------------------------
343
- # Step 2: Load the JSON file or tasks (Replace this part if you're loading tasks dynamically)
344
- # -------------------------------
345
- from fastapi import FastAPI, Request
346
- from langchain_core.documents import Document
347
- import uuid
348
-
349
- app = FastAPI()
350
-
351
- @app.post("/start")
352
- async def start_questions(request: Request):
353
- data = await request.json()
354
- questions = data.get("questions", [])
355
-
356
- docs = []
357
- for task in questions:
358
- question_text = task.get("question", "").strip()
359
- if not question_text:
360
- continue
361
-
362
- task["id"] = str(uuid.uuid4())
363
- docs.append(Document(page_content=question_text, metadata=task))
364
-
365
- return {"message": f"Loaded {len(docs)} questions", "docs": [doc.page_content for doc in docs]}
366
-
367
-
368
-
369
-
370
- # -------------------------------
371
- # Step 4: Set up BERT Embeddings and FAISS VectorStore
372
  # -------------------------------
373
 
374
  # -----------------------------
375
- # 1. Define Custom BERT Embedding Model
376
  # -----------------------------
377
  import torch
378
  import torch.nn.functional as F
379
  from transformers import BertTokenizer, BertModel
380
- from langchain.embeddings import Embeddings
381
 
382
- class BERTEmbeddings(Embeddings):
383
- def __init__(self, model_name='bert-base-uncased', device='cpu'):
384
- # Initialize the tokenizer and model
385
  self.tokenizer = BertTokenizer.from_pretrained(model_name)
386
  self.model = BertModel.from_pretrained(model_name)
387
- self.model.eval() # Set model to eval mode
388
- self.device = device
389
- self.model.to(self.device) # Move model to the specified device (CPU or GPU)
390
 
391
  def embed_documents(self, texts):
392
- # Tokenize the input texts
393
- inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512)
394
- inputs = {key: value.to(self.device) for key, value in inputs.items()} # Move inputs to the specified device
395
-
396
  with torch.no_grad():
397
  outputs = self.model(**inputs)
398
-
399
- # Get the embeddings by averaging the last hidden state across tokens
400
  embeddings = outputs.last_hidden_state.mean(dim=1)
401
-
402
- # Normalize embeddings for cosine similarity
403
- embeddings = F.normalize(embeddings, p=2, dim=1)
404
-
405
- # Return the embeddings as numpy array
406
  return embeddings.cpu().numpy()
407
 
408
  def embed_query(self, text):
409
- # Embed a single query (text)
410
  return self.embed_documents([text])[0]
411
 
412
 
413
- # -----------------------------
414
- # 2. Initialize Embedding Model
415
- # -----------------------------
416
 
417
  # -----------------------------
418
  # Create FAISS Vector Store
@@ -434,7 +389,7 @@ class MyVectorStore:
434
  return cls(index)
435
 
436
  # -----------------------------
437
- # 3. Prepare Documents
438
  # -----------------------------
439
  # Define the URL where the JSON file is hosted
440
  url = "https://agents-course-unit4-scoring.hf.space/questions"
@@ -467,11 +422,8 @@ loaded_vector_store = MyVectorStore.load_local("faiss_index.index")
467
 
468
 
469
 
470
-
471
-
472
-
473
  # -----------------------------
474
- # 6. Create LangChain Retriever Tool
475
  # -----------------------------
476
 
477
  retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
@@ -502,17 +454,6 @@ llm = HuggingFaceEndpoint(
502
  )
503
 
504
 
505
- # No longer required as Langgraph is replacing Langchain
506
- # Initialize LangChain agent
507
- #agent = initialize_agent(
508
- # tools=tools,
509
- # llm=llm,
510
- # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
511
- # verbose=True
512
- #)
513
-
514
-
515
-
516
 
517
  # -------------------------------
518
  # Step 8: Use the Planner, Classifier, and Decision Logic
 
340
 
341
 
342
  # -------------------------------
343
+ # Set up BERT Embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  # -------------------------------
345
 
346
  # -----------------------------
347
+ # Define Custom BERT Embedding Model
348
  # -----------------------------
349
  import torch
350
  import torch.nn.functional as F
351
  from transformers import BertTokenizer, BertModel
 
352
 
353
+ class BERTEmbeddings:
354
+ def __init__(self, model_name='bert-base-uncased'):
 
355
  self.tokenizer = BertTokenizer.from_pretrained(model_name)
356
  self.model = BertModel.from_pretrained(model_name)
357
+ self.model.eval() # Set to evaluation mode
 
 
358
 
359
  def embed_documents(self, texts):
360
+ inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
 
 
 
361
  with torch.no_grad():
362
  outputs = self.model(**inputs)
 
 
363
  embeddings = outputs.last_hidden_state.mean(dim=1)
364
+ embeddings = F.normalize(embeddings, p=2, dim=1) # Normalize for cosine similarity
 
 
 
 
365
  return embeddings.cpu().numpy()
366
 
367
  def embed_query(self, text):
 
368
  return self.embed_documents([text])[0]
369
 
370
 
 
 
 
371
 
372
  # -----------------------------
373
  # Create FAISS Vector Store
 
389
  return cls(index)
390
 
391
  # -----------------------------
392
+ # Prepare Documents
393
  # -----------------------------
394
  # Define the URL where the JSON file is hosted
395
  url = "https://agents-course-unit4-scoring.hf.space/questions"
 
422
 
423
 
424
 
 
 
 
425
  # -----------------------------
426
+ # Create LangChain Retriever Tool
427
  # -----------------------------
428
 
429
  retriever = FAISS.load_local("faiss_index.index", embedding_model).as_retriever()
 
454
  )
455
 
456
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
  # -------------------------------
459
  # Step 8: Use the Planner, Classifier, and Decision Logic