ak3ra commited on
Commit
7ddc93d
·
1 Parent(s): 05d5b78

add csv export

Browse files
Files changed (5) hide show
  1. app.py +153 -79
  2. config.py +2 -0
  3. rag/rag_pipeline.py +1 -0
  4. utils/helpers.py +15 -13
  5. utils/zotero_manager.py +3 -1
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import json
2
  from typing import List, Tuple
3
  import os
@@ -8,7 +10,12 @@ from dotenv import load_dotenv
8
  from slugify import slugify
9
 
10
  from rag.rag_pipeline import RAGPipeline
11
- from utils.helpers import generate_follow_up_questions, append_to_study_files, add_study_files_to_chromadb, chromadb_client
 
 
 
 
 
12
  from utils.prompts import (
13
  highlight_prompt,
14
  evidence_based_prompt,
@@ -19,6 +26,11 @@ import openai
19
  from config import STUDY_FILES, OPENAI_API_KEY
20
  from utils.zotero_manager import ZoteroManager
21
 
 
 
 
 
 
22
  load_dotenv()
23
  logging.basicConfig(level=logging.INFO)
24
 
@@ -30,7 +42,10 @@ add_study_files_to_chromadb("study_files.json", "study_files_collection")
30
  # Cache for RAG pipelines
31
  rag_cache = {}
32
 
33
- def process_zotero_library_items(zotero_library_id: str, zotero_api_access_key: str) -> str:
 
 
 
34
  if not zotero_library_id or not zotero_api_access_key:
35
  return "Please enter your zotero library Id and API Access Key"
36
 
@@ -46,9 +61,13 @@ def process_zotero_library_items(zotero_library_id: str, zotero_api_access_key:
46
  )
47
 
48
  zotero_collections = zotero_manager.get_collections()
49
- zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
 
 
50
  filtered_zotero_collection_lists = (
51
- zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
 
 
52
  )
53
 
54
  study_files_data = {} # Dictionary to collect items for ChromaDB
@@ -62,12 +81,16 @@ def process_zotero_library_items(zotero_library_id: str, zotero_api_access_key:
62
  zotero_manager.get_collection_zotero_items_by_key(collection_key)
63
  )
64
  #### Export zotero collection items to json ####
65
- zotero_items_json = zotero_manager.zotero_items_to_json(zotero_collection_items)
 
 
66
  export_file = f"{slugify(collection_name)}_zotero_items.json"
67
  zotero_manager.write_zotero_items_to_json_file(
68
  zotero_items_json, f"data/{export_file}"
69
  )
70
- append_to_study_files("study_files.json", collection_name, f"data/{export_file}")
 
 
71
 
72
  # Collect for ChromaDB
73
  study_files_data[collection_name] = f"data/{export_file}"
@@ -75,13 +98,13 @@ def process_zotero_library_items(zotero_library_id: str, zotero_api_access_key:
75
  # Update in-memory STUDY_FILES for reference in current session
76
  STUDY_FILES.update({collection_name: f"data/{export_file}"})
77
  logging.info(f"STUDY_FILES: {STUDY_FILES}")
78
-
79
  # After loop, add all collected data to ChromaDB
80
  add_study_files_to_chromadb("study_files.json", "study_files_collection")
81
  message = "Successfully processed items in your zotero library"
82
  except Exception as e:
83
  message = f"Error process your zotero library: {str(e)}"
84
-
85
  return message
86
 
87
 
@@ -93,11 +116,11 @@ def get_rag_pipeline(study_name: str) -> RAGPipeline:
93
  result = collection.get(ids=[study_name]) # Retrieve document by ID
94
 
95
  # Check if the result contains the requested document
96
- if not result or len(result['metadatas']) == 0:
97
  raise ValueError(f"Invalid study name: {study_name}")
98
 
99
  # Extract the file path from the document metadata
100
- study_file = result['metadatas'][0].get("file_path")
101
  if not study_file:
102
  raise ValueError(f"File path not found for study name: {study_name}")
103
 
@@ -107,9 +130,7 @@ def get_rag_pipeline(study_name: str) -> RAGPipeline:
107
  return rag_cache[study_name]
108
 
109
 
110
- def chat_function(
111
- message: str, study_name: str, prompt_type: str
112
- ) -> str:
113
  """Process a chat message and generate a response using the RAG pipeline."""
114
 
115
  if not message.strip():
@@ -134,11 +155,11 @@ def get_study_info(study_name: str) -> str:
134
  logging.info(f"Result: ======> {result}")
135
 
136
  # Check if the document exists in the result
137
- if not result or len(result['metadatas']) == 0:
138
  raise ValueError(f"Invalid study name: {study_name}")
139
 
140
  # Extract the file path from the document metadata
141
- study_file = result['metadatas'][0].get("file_path")
142
  logging.info(f"study_file: =======> {study_file}")
143
  if not study_file:
144
  raise ValueError(f"File path not found for study name: {study_name}")
@@ -148,6 +169,34 @@ def get_study_info(study_name: str) -> str:
148
  return f"### Number of documents: {len(data)}"
149
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.update]:
152
  """Update the interface based on the selected study."""
153
 
@@ -163,13 +212,14 @@ def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.upd
163
  def set_question(question: str) -> str:
164
  return question.lstrip("✨ ")
165
 
 
166
  def process_multi_input(text, study_name, prompt_type):
167
  # Split input based on commas and strip any extra spaces
168
- variable_list = [word.strip().upper() for word in text.split(',')]
169
- user_message =f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
170
  logging.info(f"User message: ==> {user_message}")
171
  response = chat_function(user_message, study_name, prompt_type)
172
- return response
173
 
174
 
175
  def create_gr_interface() -> gr.Blocks:
@@ -189,32 +239,46 @@ def create_gr_interface() -> gr.Blocks:
189
 
190
  with gr.Blocks() as demo:
191
  gr.Markdown("# ACRES RAG Platform")
192
-
193
  with gr.Row():
194
  with gr.Column(scale=1):
195
  gr.Markdown("### Zotero Credentials")
196
- zotero_library_id = gr.Textbox(label="Zotero Library ID", type="password", placeholder="Enter Your Zotero Library ID here...")
197
- zotero_api_access_key = gr.Textbox(label="Zotero API Access Key", type="password", placeholder="Enter Your Zotero API Access Key...")
 
 
 
 
 
 
 
 
198
  process_zotero_btn = gr.Button("Process your Zotero Library")
199
  zotero_output = gr.Markdown(label="Zotero")
200
 
201
  gr.Markdown("### Study Information")
202
 
203
  # Query ChromaDB for all document IDs in the "study_files_collection" collection
204
- collection = chromadb_client.get_or_create_collection("study_files_collection")
 
 
205
  # Retrieve all documents by querying with an empty string and specifying a high n_results
206
  all_documents = collection.query(query_texts=[""], n_results=1000)
207
  logging.info(f"all_documents: =========> {all_documents}")
208
  # Extract document IDs as study names
209
  document_ids = all_documents.get("ids")
210
- study_choices = [doc_id for doc_id in document_ids[0] if document_ids] # Get list of document IDs
 
 
211
  logging.info(f"study_choices: ======> {study_choices}")
212
 
213
  # Update the Dropdown with choices from ChromaDB
214
  study_dropdown = gr.Dropdown(
215
  choices=study_choices,
216
  label="Select Study",
217
- value=study_choices[0] if study_choices else None, # Set first choice as default, if available
 
 
218
  )
219
 
220
  study_info = gr.Markdown(label="Study Details")
@@ -226,7 +290,7 @@ def create_gr_interface() -> gr.Blocks:
226
  value="Default",
227
  )
228
  # clear = gr.Button("Clear Chat")
229
-
230
  with gr.Column(scale=3):
231
  gr.Markdown("### Study Variables")
232
  with gr.Row():
@@ -239,59 +303,52 @@ def create_gr_interface() -> gr.Blocks:
239
  )
240
  submit_btn = gr.Button("Submit", scale=1)
241
  answer_output = gr.Markdown(label="Answer")
 
 
 
 
 
 
 
 
242
 
243
- def user(
244
- user_message: str, history: List[List[str]]
245
- ) -> Tuple[str, List[List[str]]]:
246
- return "", (
247
- history + [[user_message, None]] if user_message.strip() else history
248
- )
249
-
250
- def bot(
251
- history: List[List[str]], study_name: str, prompt_type: str
252
- ) -> List[List[str]]:
253
- """
254
- Generate bot response and update the interface.
255
-
256
- This function:
257
- 1. Processes the latest user message
258
- 2. Generates a response using the RAG pipeline
259
- 3. Updates the chat history
260
- 4. Generates follow-up questions
261
- 5. Prepares interface updates for follow-up buttons
262
-
263
- Args:
264
- history (List[List[str]]): The current chat history.
265
- study_name (str): The name of the current study.
266
- prompt_type (str): The type of prompt being used.
267
-
268
- Returns:
269
- Tuple[List[List[str]], gr.update, gr.update, gr.update]:
270
- Updated chat history and interface components for follow-up questions.
271
- """
272
- if not history:
273
- return history, [], [], []
274
-
275
- user_message = history[-1][0]
276
- bot_message = chat_function(user_message, history, study_name, prompt_type)
277
- history[-1][1] = bot_message
278
-
279
- return history
280
-
281
- # msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
282
- # bot,
283
- # [chatbot, study_dropdown, prompt_type],
284
- # [chatbot, *follow_up_btns],
285
- # )
286
- # send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
287
- # bot,
288
- # [chatbot, study_dropdown, prompt_type],
289
- # [chatbot, *follow_up_btns],
290
- # )
291
- # for btn in follow_up_btns + sample_btns:
292
- # btn.click(set_question, inputs=[btn], outputs=[msg])
293
-
294
- # clear.click(lambda: None, None, chatbot, queue=False)
295
 
296
  study_dropdown.change(
297
  fn=get_study_info,
@@ -299,8 +356,25 @@ def create_gr_interface() -> gr.Blocks:
299
  outputs=[study_info],
300
  )
301
 
302
- process_zotero_btn.click(process_zotero_library_items, inputs=[zotero_library_id, zotero_api_access_key], outputs=[zotero_output], queue=False)
303
- submit_btn.click(process_multi_input, inputs=[study_variables, study_dropdown, prompt_type], outputs=[answer_output], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  return demo
306
 
 
1
+ # app.py
2
+
3
  import json
4
  from typing import List, Tuple
5
  import os
 
10
  from slugify import slugify
11
 
12
  from rag.rag_pipeline import RAGPipeline
13
+ from utils.helpers import (
14
+ generate_follow_up_questions,
15
+ append_to_study_files,
16
+ add_study_files_to_chromadb,
17
+ chromadb_client,
18
+ )
19
  from utils.prompts import (
20
  highlight_prompt,
21
  evidence_based_prompt,
 
26
  from config import STUDY_FILES, OPENAI_API_KEY
27
  from utils.zotero_manager import ZoteroManager
28
 
29
+ import csv
30
+ import io
31
+
32
+ import datetime
33
+
34
  load_dotenv()
35
  logging.basicConfig(level=logging.INFO)
36
 
 
42
  # Cache for RAG pipelines
43
  rag_cache = {}
44
 
45
+
46
+ def process_zotero_library_items(
47
+ zotero_library_id: str, zotero_api_access_key: str
48
+ ) -> str:
49
  if not zotero_library_id or not zotero_api_access_key:
50
  return "Please enter your zotero library Id and API Access Key"
51
 
 
61
  )
62
 
63
  zotero_collections = zotero_manager.get_collections()
64
+ zotero_collection_lists = zotero_manager.list_zotero_collections(
65
+ zotero_collections
66
+ )
67
  filtered_zotero_collection_lists = (
68
+ zotero_manager.filter_and_return_collections_with_items(
69
+ zotero_collection_lists
70
+ )
71
  )
72
 
73
  study_files_data = {} # Dictionary to collect items for ChromaDB
 
81
  zotero_manager.get_collection_zotero_items_by_key(collection_key)
82
  )
83
  #### Export zotero collection items to json ####
84
+ zotero_items_json = zotero_manager.zotero_items_to_json(
85
+ zotero_collection_items
86
+ )
87
  export_file = f"{slugify(collection_name)}_zotero_items.json"
88
  zotero_manager.write_zotero_items_to_json_file(
89
  zotero_items_json, f"data/{export_file}"
90
  )
91
+ append_to_study_files(
92
+ "study_files.json", collection_name, f"data/{export_file}"
93
+ )
94
 
95
  # Collect for ChromaDB
96
  study_files_data[collection_name] = f"data/{export_file}"
 
98
  # Update in-memory STUDY_FILES for reference in current session
99
  STUDY_FILES.update({collection_name: f"data/{export_file}"})
100
  logging.info(f"STUDY_FILES: {STUDY_FILES}")
101
+
102
  # After loop, add all collected data to ChromaDB
103
  add_study_files_to_chromadb("study_files.json", "study_files_collection")
104
  message = "Successfully processed items in your zotero library"
105
  except Exception as e:
106
  message = f"Error process your zotero library: {str(e)}"
107
+
108
  return message
109
 
110
 
 
116
  result = collection.get(ids=[study_name]) # Retrieve document by ID
117
 
118
  # Check if the result contains the requested document
119
+ if not result or len(result["metadatas"]) == 0:
120
  raise ValueError(f"Invalid study name: {study_name}")
121
 
122
  # Extract the file path from the document metadata
123
+ study_file = result["metadatas"][0].get("file_path")
124
  if not study_file:
125
  raise ValueError(f"File path not found for study name: {study_name}")
126
 
 
130
  return rag_cache[study_name]
131
 
132
 
133
+ def chat_function(message: str, study_name: str, prompt_type: str) -> str:
 
 
134
  """Process a chat message and generate a response using the RAG pipeline."""
135
 
136
  if not message.strip():
 
155
  logging.info(f"Result: ======> {result}")
156
 
157
  # Check if the document exists in the result
158
+ if not result or len(result["metadatas"]) == 0:
159
  raise ValueError(f"Invalid study name: {study_name}")
160
 
161
  # Extract the file path from the document metadata
162
+ study_file = result["metadatas"][0].get("file_path")
163
  logging.info(f"study_file: =======> {study_file}")
164
  if not study_file:
165
  raise ValueError(f"File path not found for study name: {study_name}")
 
169
  return f"### Number of documents: {len(data)}"
170
 
171
 
172
+ def markdown_table_to_csv(markdown_text: str) -> str:
173
+ """Convert a markdown table to CSV format."""
174
+ # Split the text into lines and remove empty lines
175
+ lines = [line.strip() for line in markdown_text.split("\n") if line.strip()]
176
+
177
+ # Find the table content (lines starting with |)
178
+ table_lines = [line for line in lines if line.startswith("|")]
179
+
180
+ if not table_lines:
181
+ return ""
182
+
183
+ # Process each line to extract cell values
184
+ csv_data = []
185
+ for line in table_lines:
186
+ # Skip separator lines (containing only dashes)
187
+ if "---" in line:
188
+ continue
189
+ # Split by |, remove empty strings, and strip whitespace
190
+ cells = [cell.strip() for cell in line.split("|") if cell.strip()]
191
+ csv_data.append(cells)
192
+
193
+ # Create CSV string
194
+ output = io.StringIO()
195
+ writer = csv.writer(output)
196
+ writer.writerows(csv_data)
197
+ return output.getvalue()
198
+
199
+
200
  def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.update]:
201
  """Update the interface based on the selected study."""
202
 
 
212
  def set_question(question: str) -> str:
213
  return question.lstrip("✨ ")
214
 
215
+
216
  def process_multi_input(text, study_name, prompt_type):
217
  # Split input based on commas and strip any extra spaces
218
+ variable_list = [word.strip().upper() for word in text.split(",")]
219
+ user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
220
  logging.info(f"User message: ==> {user_message}")
221
  response = chat_function(user_message, study_name, prompt_type)
222
+ return [response, gr.update(visible=True)]
223
 
224
 
225
  def create_gr_interface() -> gr.Blocks:
 
239
 
240
  with gr.Blocks() as demo:
241
  gr.Markdown("# ACRES RAG Platform")
242
+
243
  with gr.Row():
244
  with gr.Column(scale=1):
245
  gr.Markdown("### Zotero Credentials")
246
+ zotero_library_id = gr.Textbox(
247
+ label="Zotero Library ID",
248
+ type="password",
249
+ placeholder="Enter Your Zotero Library ID here...",
250
+ )
251
+ zotero_api_access_key = gr.Textbox(
252
+ label="Zotero API Access Key",
253
+ type="password",
254
+ placeholder="Enter Your Zotero API Access Key...",
255
+ )
256
  process_zotero_btn = gr.Button("Process your Zotero Library")
257
  zotero_output = gr.Markdown(label="Zotero")
258
 
259
  gr.Markdown("### Study Information")
260
 
261
  # Query ChromaDB for all document IDs in the "study_files_collection" collection
262
+ collection = chromadb_client.get_or_create_collection(
263
+ "study_files_collection"
264
+ )
265
  # Retrieve all documents by querying with an empty string and specifying a high n_results
266
  all_documents = collection.query(query_texts=[""], n_results=1000)
267
  logging.info(f"all_documents: =========> {all_documents}")
268
  # Extract document IDs as study names
269
  document_ids = all_documents.get("ids")
270
+ study_choices = [
271
+ doc_id for doc_id in document_ids[0] if document_ids
272
+ ] # Get list of document IDs
273
  logging.info(f"study_choices: ======> {study_choices}")
274
 
275
  # Update the Dropdown with choices from ChromaDB
276
  study_dropdown = gr.Dropdown(
277
  choices=study_choices,
278
  label="Select Study",
279
+ value=(
280
+ study_choices[0] if study_choices else None
281
+ ), # Set first choice as default, if available
282
  )
283
 
284
  study_info = gr.Markdown(label="Study Details")
 
290
  value="Default",
291
  )
292
  # clear = gr.Button("Clear Chat")
293
+
294
  with gr.Column(scale=3):
295
  gr.Markdown("### Study Variables")
296
  with gr.Row():
 
303
  )
304
  submit_btn = gr.Button("Submit", scale=1)
305
  answer_output = gr.Markdown(label="Answer")
306
+ # button to download_csv
307
+ download_btn = gr.DownloadButton(
308
+ "Download as CSV",
309
+ variant="primary",
310
+ size="sm",
311
+ scale=1,
312
+ visible=False,
313
+ )
314
 
315
+ def download_as_csv(markdown_content):
316
+ """Convert markdown table to CSV and provide for download."""
317
+ if not markdown_content:
318
+ return None
319
+
320
+ csv_content = markdown_table_to_csv(markdown_content)
321
+ if not csv_content:
322
+ return None
323
+
324
+ # Create temporary file with actual content
325
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
326
+ temp_path = f"study_export_{timestamp}.csv"
327
+
328
+ with open(temp_path, "w", newline="", encoding="utf-8") as f:
329
+ f.write(csv_content)
330
+
331
+ return temp_path
332
+
333
+ def cleanup_temp_files():
334
+ """Clean up old temporary files."""
335
+ try:
336
+ # Delete files older than 5 minutes
337
+ current_time = datetime.datetime.now()
338
+ for file in os.listdir():
339
+ if file.startswith("study_export_") and file.endswith(".csv"):
340
+ file_time = datetime.datetime.fromtimestamp(
341
+ os.path.getmtime(file)
342
+ )
343
+ if (current_time - file_time).seconds > 30: # 5 minutes
344
+ try:
345
+ os.remove(file)
346
+ except Exception as e:
347
+ logging.warning(
348
+ f"Failed to remove temp file {file}: {e}"
349
+ )
350
+ except Exception as e:
351
+ logging.warning(f"Error during cleanup: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  study_dropdown.change(
354
  fn=get_study_info,
 
356
  outputs=[study_info],
357
  )
358
 
359
+ process_zotero_btn.click(
360
+ process_zotero_library_items,
361
+ inputs=[zotero_library_id, zotero_api_access_key],
362
+ outputs=[zotero_output],
363
+ queue=False,
364
+ )
365
+ submit_btn.click(
366
+ process_multi_input,
367
+ inputs=[study_variables, study_dropdown, prompt_type],
368
+ outputs=[answer_output, download_btn],
369
+ queue=False,
370
+ )
371
+ download_btn.click(
372
+ fn=download_as_csv,
373
+ inputs=[answer_output],
374
+ outputs=[download_btn],
375
+ ).then(
376
+ fn=cleanup_temp_files, inputs=None, outputs=None # Clean up after download
377
+ )
378
 
379
  return demo
380
 
config.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
 
3
  from dotenv import load_dotenv
 
1
+ # config.py
2
+
3
  import os
4
 
5
  from dotenv import load_dotenv
rag/rag_pipeline.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import json
2
  import logging
3
  from typing import Dict, Any, List
 
1
+ # rag/rag_pipeline.py
2
  import json
3
  import logging
4
  from typing import Dict, Any, List
utils/helpers.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from typing import Dict, Any
2
  from llama_index.core import Response
3
  from typing import List
@@ -29,7 +31,7 @@ def read_study_files(file_path):
29
  Raises:
30
  FileNotFoundError: If the file is not found at the provided path.
31
  json.JSONDecodeError: If the file contents are not valid JSON.
32
-
33
  Example:
34
  Given a JSON file 'study_files.json' with content like:
35
  {
@@ -46,13 +48,15 @@ def read_study_files(file_path):
46
  }
47
  """
48
  try:
49
- with open(file_path, 'r') as file:
50
  data = json.load(file)
51
  return data
52
  except FileNotFoundError as e:
53
  raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
54
  except json.JSONDecodeError as e:
55
- raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
 
 
56
 
57
 
58
  def append_to_study_files(file_path, new_key, new_value):
@@ -86,20 +90,22 @@ def append_to_study_files(file_path, new_key, new_value):
86
  """
87
  try:
88
  # Read the existing data from the file
89
- with open(file_path, 'r') as file:
90
  data = json.load(file)
91
-
92
  # Append the new key-value pair to the dictionary
93
  data[new_key] = new_value
94
 
95
  # Write the updated data back to the file
96
- with open(file_path, 'w') as file:
97
  json.dump(data, file, indent=4) # indent for pretty printing
98
 
99
  except FileNotFoundError as e:
100
  raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
101
  except json.JSONDecodeError as e:
102
- raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
 
 
103
  except IOError as e:
104
  raise IOError(f"Failed to write to the file at {file_path}.") from e
105
 
@@ -204,12 +210,8 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
204
  metadatas.append({"file_path": file_path}) # Metadata with file path
205
 
206
  # Add documents to the collection in batch
207
- collection.add(
208
- ids=ids,
209
- documents=documents,
210
- metadatas=metadatas
211
- )
212
-
213
  print("All study files have been successfully added to ChromaDB.")
214
 
215
 
 
1
+ # utils/helpers.py
2
+
3
  from typing import Dict, Any
4
  from llama_index.core import Response
5
  from typing import List
 
31
  Raises:
32
  FileNotFoundError: If the file is not found at the provided path.
33
  json.JSONDecodeError: If the file contents are not valid JSON.
34
+
35
  Example:
36
  Given a JSON file 'study_files.json' with content like:
37
  {
 
48
  }
49
  """
50
  try:
51
+ with open(file_path, "r") as file:
52
  data = json.load(file)
53
  return data
54
  except FileNotFoundError as e:
55
  raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
56
  except json.JSONDecodeError as e:
57
+ raise ValueError(
58
+ f"The file at path {file_path} does not contain valid JSON."
59
+ ) from e
60
 
61
 
62
  def append_to_study_files(file_path, new_key, new_value):
 
90
  """
91
  try:
92
  # Read the existing data from the file
93
+ with open(file_path, "r") as file:
94
  data = json.load(file)
95
+
96
  # Append the new key-value pair to the dictionary
97
  data[new_key] = new_value
98
 
99
  # Write the updated data back to the file
100
+ with open(file_path, "w") as file:
101
  json.dump(data, file, indent=4) # indent for pretty printing
102
 
103
  except FileNotFoundError as e:
104
  raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
105
  except json.JSONDecodeError as e:
106
+ raise ValueError(
107
+ f"The file at path {file_path} does not contain valid JSON."
108
+ ) from e
109
  except IOError as e:
110
  raise IOError(f"Failed to write to the file at {file_path}.") from e
111
 
 
210
  metadatas.append({"file_path": file_path}) # Metadata with file path
211
 
212
  # Add documents to the collection in batch
213
+ collection.add(ids=ids, documents=documents, metadatas=metadatas)
214
+
 
 
 
 
215
  print("All study files have been successfully added to ChromaDB.")
216
 
217
 
utils/zotero_manager.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import json
2
  import os
3
  from typing import Any, Dict, List, Optional
@@ -641,4 +643,4 @@ if __name__ == "__main__":
641
  ## Save to disc
642
  zotero_manager.write_zotero_items_to_json_file(
643
  ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json"
644
- )
 
1
+ # utils/zotero_manager.py
2
+
3
  import json
4
  import os
5
  from typing import Any, Dict, List, Optional
 
643
  ## Save to disc
644
  zotero_manager.write_zotero_items_to_json_file(
645
  ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json"
646
+ )