wt002 commited on
Commit
ecbb679
·
verified ·
1 Parent(s): b7e4e52

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +22 -8
agent.py CHANGED
@@ -29,7 +29,6 @@ from langchain.schema import Document
29
  import requests
30
  import json
31
  #from langchain.embeddings import HuggingFaceEmbeddings
32
- from langchain.vectorstores import FAISS
33
  from langchain.schema import Document
34
  #from langchain.agents import create_retriever_tool
35
 
@@ -149,18 +148,33 @@ sys_msg = SystemMessage(content=system_prompt)
149
  # -------------------------------
150
  # Step 1: Load JSON data from URL
151
  # -------------------------------
152
- json_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/questions.json" # Replace with your actual JSON URL
153
- response = requests.get(json_url)
154
 
155
  # Ensure the request was successful
156
  if response.status_code != 200:
157
- raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- # Parse the JSON content
160
- data = response.json()
161
 
162
- # Make sure we have the correct structure in the JSON
163
- assert isinstance(data, list), "The JSON should contain a list of documents."
164
 
165
  # -------------------------------
166
  # Step 2: Prepare documents
 
29
  import requests
30
  import json
31
  #from langchain.embeddings import HuggingFaceEmbeddings
 
32
  from langchain.schema import Document
33
  #from langchain.agents import create_retriever_tool
34
 
 
148
  # -------------------------------
149
  # Step 1: Load JSON data from URL
150
  # -------------------------------
151
+ jsonl_url = "https://example.com/documents.jsonl" # Replace with your actual JSONL URL
152
+ response = requests.get(jsonl_url)
153
 
154
  # Ensure the request was successful
155
  if response.status_code != 200:
156
+ raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
157
+
158
+
159
+ # Read and parse the JSONL file line by line
160
+ docs = []
161
+ for line in response.text.splitlines():
162
+ try:
163
+ doc = json.loads(line) # Parse each line as a separate JSON object
164
+ content = doc.get('content', "").strip()
165
+ if not content:
166
+ continue # Skip documents with no content
167
+
168
+ # Add unique ID to each document
169
+ doc['id'] = str(uuid.uuid4())
170
+
171
+ # Convert the document into a Document object
172
+ docs.append(Document(page_content=content, metadata=doc))
173
+
174
+ except json.JSONDecodeError:
175
+ print("Skipping malformed JSONL line.")
176
 
 
 
177
 
 
 
178
 
179
  # -------------------------------
180
  # Step 2: Prepare documents