Final_Assignment_Project

Running

wt002 commited on 7 days ago

Commit

ecbb679

verified ·

1 Parent(s): b7e4e52

Update agent.py

Files changed (1) hide show

agent.py CHANGED Viewed

@@ -29,7 +29,6 @@ from langchain.schema import Document
 import requests
 import json
 #from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.schema import Document
 #from langchain.agents import create_retriever_tool
@@ -149,18 +148,33 @@ sys_msg = SystemMessage(content=system_prompt)
 # -------------------------------
 # Step 1: Load JSON data from URL
 # -------------------------------
-json_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/questions.json"  # Replace with your actual JSON URL
-response = requests.get(json_url)
 # Ensure the request was successful
 if response.status_code != 200:
-    raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
-# Parse the JSON content
-data = response.json()
-# Make sure we have the correct structure in the JSON
-assert isinstance(data, list), "The JSON should contain a list of documents."
 # -------------------------------
 # Step 2: Prepare documents

 import requests
 import json
 #from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.schema import Document
 #from langchain.agents import create_retriever_tool
 # -------------------------------
 # Step 1: Load JSON data from URL
 # -------------------------------
+jsonl_url = "https://example.com/documents.jsonl"  # Replace with your actual JSONL URL
+response = requests.get(jsonl_url)
 # Ensure the request was successful
 if response.status_code != 200:
+    raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
+# Read and parse the JSONL file line by line
+docs = []
+for line in response.text.splitlines():
+    try:
+        doc = json.loads(line)  # Parse each line as a separate JSON object
+        content = doc.get('content', "").strip()
+        if not content:
+            continue  # Skip documents with no content
+        # Add unique ID to each document
+        doc['id'] = str(uuid.uuid4())
+        # Convert the document into a Document object
+        docs.append(Document(page_content=content, metadata=doc))
+    except json.JSONDecodeError:
+        print("Skipping malformed JSONL line.")
 # -------------------------------
 # Step 2: Prepare documents