Final_Assignment_Project

Runtime error

App Files Files Community

wt002 commited on 4 days ago

Commit

c63d0c0

verified ·

1 Parent(s): d3d7810

Update agent.py

Browse files

Files changed (1) hide show

agent.py +49 -53

agent.py CHANGED Viewed

@@ -39,6 +39,11 @@ from docx import Document as DocxDocument
 import openpyxl
 from io import StringIO
 load_dotenv()
 @tool
@@ -313,47 +318,55 @@ for task in tasks:
 # -------------------------------
 # Initialize HuggingFace Embedding model
 #embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
-embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-# -----------------------------
-# Step 1: Load CSV Questions
-# -----------------------------
-csv_path = "questions.csv"  # Change to your CSV file
-df = pd.read_csv(csv_path)
-docs = []
-for _, row in df.iterrows():
-    question = str(row.get("question", "")).strip()
-    if question:
-        docs.append(Document(page_content=question, metadata={"source": "csv"}))
-docs = []
-for _, row in df.iterrows():
-    question = str(row.get("question", "")).strip()
-    if question:
-        docs.append(Document(page_content=question, metadata={"source": "csv"}))
-# -----------------------------
-# Step 2: Add Wikipedia Docs
-# -----------------------------
-wiki_docs = []
-for doc in docs:
-    try:
-        wiki_results = WikipediaLoader(query=doc.page_content, load_max_docs=1).load()
-        wiki_docs.extend(wiki_results)
-    except Exception as e:
-        print(f"Failed to load Wikipedia for: {doc.page_content} — {e}")
-all_docs = docs + wiki_docs
-# -----------------------------
-# Step 3: Build FAISS Index
-# -----------------------------
-embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-vector_store = FAISS.from_documents(all_docs, embedding_model)
 vector_store.save_local("faiss_index")
 # -----------------------------
 # Step 4: Create Retriever Tool
 # -----------------------------
@@ -367,23 +380,6 @@ question_retriever_tool = create_retriever_tool(
-# -------------------------------
-# Step 5: Create Retriever Tool (for use in LangChain)
-# -------------------------------
-retriever = vector_store.as_retriever()
-# Create the retriever tool
-question_retriever_tool = create_retriever_tool(
-    retriever=retriever,
-    name="Question_Search",
-    description="A tool to retrieve documents related to a user's question."
-)
-vector_store = FAISS.from_documents(all_docs, embedding_model)
-vector_store.save_local("faiss_index")
 def retriever(state: MessagesState):
     """Retriever node using similarity scores for filtering"""
     query = state["messages"][0].content

 import openpyxl
 from io import StringIO
+from transformers import BertTokenizer, BertModel
+import torch
+#from langchain.embeddings import Embedding
 load_dotenv()
 @tool
 # -------------------------------
 # Initialize HuggingFace Embedding model
 #embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+#embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
+from transformers import BertTokenizer, BertModel
+import torch
+from langchain.embeddings import Embedding
+from langchain.schema import Document
+class BERTEmbedding(Embedding):
+    def __init__(self, model_name='bert-base-uncased'):
+        # Load the pre-trained BERT model and tokenizer
+        self.tokenizer = BertTokenizer.from_pretrained(model_name)
+        self.model = BertModel.from_pretrained(model_name)
+    def embed(self, texts):
+        # Tokenize and convert texts to input format for BERT
+        inputs = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
+        # Get the BERT embeddings (we use the last hidden state)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Use the mean of the last layer hidden states as the embedding
+        embeddings = outputs.last_hidden_state.mean(dim=1)  # Shape: (batch_size, hidden_dim)
+        # Return the embeddings as a list of numpy arrays
+        return embeddings.cpu().numpy().tolist()
+# Example usage of BERTEmbedding with LangChain
+embedding_model = BERTEmbedding(model_name="bert-base-uncased")
+# Sample text (replace with your own text)
+docs = [
+    Document(page_content="Mercedes Sosa was an Argentine singer and musician."),
+    Document(page_content="The 2000s were a significant decade for music in Latin America.")
+]
+# Get the embeddings for the documents
+embeddings = embedding_model.embed([doc.page_content for doc in docs])
+# Now, you can use the embeddings with FAISS or other retrieval systems
+# For example, with FAISS:
+from langchain.vectorstores import FAISS
+# Assuming 'docs' contains your list of documents and 'embedding_model' is the model you created
+vector_store = FAISS.from_documents(docs, embedding_model)
 vector_store.save_local("faiss_index")
 # -----------------------------
 # Step 4: Create Retriever Tool
 # -----------------------------
 def retriever(state: MessagesState):
     """Retriever node using similarity scores for filtering"""
     query = state["messages"][0].content