vietexob commited on
Commit
ccf9ca7
·
1 Parent(s): d5e6064

Fixing errors of repeated texts in OpenAI model

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +10 -8
  3. llm_graph.py +13 -12
  4. sample/kv_store_doc_status.json +5 -21
.gitignore CHANGED
@@ -170,3 +170,4 @@ __marimo__/
170
  *.pdf
171
  *.csv
172
  cache/
 
 
170
  *.pdf
171
  *.csv
172
  cache/
173
+ sample/
app.py CHANGED
@@ -202,14 +202,15 @@ def create_graph(json_data, model_name=MODEL_LIST[0]):
202
 
203
  # Configure network display
204
  network.from_nx(G)
205
- network.barnes_hut(
206
- gravity=-3000,
207
- central_gravity=0.3,
208
- spring_length=50,
209
- spring_strength=0.001,
210
- damping=0.09,
211
- overlap=0,
212
- )
 
213
 
214
  # Customize node appearance
215
  for node in network.nodes:
@@ -253,6 +254,7 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
253
  is_first_example = text == EXAMPLES[0][0]
254
 
255
  # Ensure RAG is initialized
 
256
  asyncio.run(model.initialize_rag())
257
 
258
  # Try to load from cache if it's the first example
 
202
 
203
  # Configure network display
204
  network.from_nx(G)
205
+ if model_name == MODEL_LIST[0]:
206
+ network.barnes_hut(
207
+ gravity=-3000,
208
+ central_gravity=0.3,
209
+ spring_length=50,
210
+ spring_strength=0.001,
211
+ damping=0.09,
212
+ overlap=0,
213
+ )
214
 
215
  # Customize node appearance
216
  for node in network.nodes:
 
254
  is_first_example = text == EXAMPLES[0][0]
255
 
256
  # Ensure RAG is initialized
257
+ # TODO: Clear all the previous inserted texts
258
  asyncio.run(model.initialize_rag())
259
 
260
  # Try to load from cache if it's the first example
llm_graph.py CHANGED
@@ -45,17 +45,18 @@ class LLMGraph:
45
  Initialize the LightRAG instance with the specified embedding dimension.
46
  """
47
 
48
- if self.rag is None:
49
- self.rag = LightRAG(
50
- working_dir=WORKING_DIR,
51
- llm_model_func=self._llm_model_func,
52
- embedding_func=EmbeddingFunc(
53
- embedding_dim=embedding_dimension,
54
- max_token_size=8192,
55
- func=self._embedding_func,
56
- ),
57
- )
58
- # TODO: Check if this works as expected
 
59
  await self.rag.initialize_storages()
60
  await initialize_pipeline_status()
61
 
@@ -171,7 +172,7 @@ class LLMGraph:
171
  os.makedirs(WORKING_DIR, exist_ok=True)
172
 
173
  # Use LightRAG with Azure OpenAI
174
- # TODO: Clear all the previous inserted texts first
175
  self.rag.insert(text) # Insert the text into the RAG storage
176
 
177
  # Wait for GRAPHML_FILE to be created
 
45
  Initialize the LightRAG instance with the specified embedding dimension.
46
  """
47
 
48
+ # if self.rag is None:
49
+ # TODO: Check how to clear all the previous inserted texts
50
+ self.rag = LightRAG(
51
+ working_dir=WORKING_DIR,
52
+ llm_model_func=self._llm_model_func,
53
+ embedding_func=EmbeddingFunc(
54
+ embedding_dim=embedding_dimension,
55
+ max_token_size=8192,
56
+ func=self._embedding_func,
57
+ ),
58
+ )
59
+
60
  await self.rag.initialize_storages()
61
  await initialize_pipeline_status()
62
 
 
172
  os.makedirs(WORKING_DIR, exist_ok=True)
173
 
174
  # Use LightRAG with Azure OpenAI
175
+ # TODO: Clear all the previous inserted texts
176
  self.rag.insert(text) # Insert the text into the RAG storage
177
 
178
  # Wait for GRAPHML_FILE to be created
sample/kv_store_doc_status.json CHANGED
@@ -7,29 +7,13 @@
7
  ],
8
  "content_summary": "The family of Azerbaijan President Ilham Aliyev leads a charmed, glamorous life, thanks in part to financial interests in almost every sector of the economy. His wife, Mehriban, comes from the privileged and powerful Pashayev family that owns banks, ...",
9
  "content_length": 1074,
10
- "created_at": "2025-08-31T15:50:59.506391+00:00",
11
- "updated_at": "2025-08-31T15:52:26.018288+00:00",
12
  "file_path": "unknown_source",
13
- "track_id": "insert_20250831_235059_6946ff78",
14
  "metadata": {
15
- "processing_start_time": 1756655459,
16
- "processing_end_time": 1756655546
17
- }
18
- },
19
- "doc-eea199eb7feea197ebb82e9333a2d2f2": {
20
- "status": "processing",
21
- "chunks_count": 1,
22
- "chunks_list": [
23
- "chunk-eea199eb7feea197ebb82e9333a2d2f2"
24
- ],
25
- "content_summary": "Les jardins du Luxembourg, situés au cœur du sixième arrondissement de Paris, offrent un véritable havre de paix aux citadins pressés. Créés au début du dix-septième siècle sur l'initiative de Marie de Médicis, ces jardins à la française s'étendent s...",
26
- "content_length": 697,
27
- "created_at": "2025-08-31T15:54:38.060638+00:00",
28
- "updated_at": "2025-08-31T15:54:38.068349+00:00",
29
- "file_path": "unknown_source",
30
- "track_id": "insert_20250831_235438_22d326d7",
31
- "metadata": {
32
- "processing_start_time": 1756655678
33
  }
34
  }
35
  }
 
7
  ],
8
  "content_summary": "The family of Azerbaijan President Ilham Aliyev leads a charmed, glamorous life, thanks in part to financial interests in almost every sector of the economy. His wife, Mehriban, comes from the privileged and powerful Pashayev family that owns banks, ...",
9
  "content_length": 1074,
10
+ "created_at": "2025-09-01T14:53:10.896398+00:00",
11
+ "updated_at": "2025-09-01T14:54:44.898862+00:00",
12
  "file_path": "unknown_source",
13
+ "track_id": "insert_20250901_225310_a186b861",
14
  "metadata": {
15
+ "processing_start_time": 1756738390,
16
+ "processing_end_time": 1756738484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  }
19
  }