vietexob commited on
Commit
a605fa2
·
1 Parent(s): a179a95

Added Italian text

Browse files
Files changed (4) hide show
  1. CLAUDE.md +13 -2
  2. app.py +8 -3
  3. data/sample_it.txt +1 -0
  4. llm_graph.py +3 -2
CLAUDE.md CHANGED
@@ -40,7 +40,7 @@ AZURE_EMBEDDING_DEPLOYMENT=<embedding_deployment>
40
  AZURE_EMBEDDING_API_VERSION=<embedding_api_version>
41
  ```
42
 
43
- ## Running the Application
44
 
45
  ```bash
46
  # Install dependencies
@@ -48,6 +48,9 @@ pip install -r requirements.txt
48
 
49
  # Run the Gradio app
50
  python app.py
 
 
 
51
  ```
52
 
53
  ## Key Dependencies
@@ -76,4 +79,12 @@ The application expects JSON output with this schema:
76
  "nodes": [{"id": "entity", "type": "broad_type", "detailed_type": "specific_type"}],
77
  "edges": [{"from": "entity1", "to": "entity2", "label": "relationship"}]
78
  }
79
- ```
 
 
 
 
 
 
 
 
 
40
  AZURE_EMBEDDING_API_VERSION=<embedding_api_version>
41
  ```
42
 
43
+ ## Development Commands
44
 
45
  ```bash
46
  # Install dependencies
 
48
 
49
  # Run the Gradio app
50
  python app.py
51
+
52
+ # Test model extraction directly
53
+ python llm_graph.py
54
  ```
55
 
56
  ## Key Dependencies
 
79
  "nodes": [{"id": "entity", "type": "broad_type", "detailed_type": "specific_type"}],
80
  "edges": [{"from": "entity1", "to": "entity2", "label": "relationship"}]
81
  }
82
+ ```
83
+
84
+ ## Testing and Development Notes
85
+
86
+ - No formal test suite exists; manual testing through the Gradio interface
87
+ - First example is automatically cached for performance on startup
88
+ - Cache files stored in `cache/` directory as pickle files
89
+ - Working directory `sample/` is cleared and recreated on each run
90
+ - GraphML files generated by LightRAG for Azure OpenAI model backend
app.py CHANGED
@@ -59,6 +59,10 @@ text_es_file = "./data/sample_es.txt"
59
  with open(text_es_file, 'r', encoding='utf-8') as file:
60
  text_es = file.read()
61
 
 
 
 
 
62
  # Create cache directory if it doesn't exist
63
  os.makedirs(CACHE_DIR, exist_ok=True)
64
  os.makedirs(WORKING_DIR, exist_ok=True)
@@ -95,11 +99,11 @@ def extract_kg(text="", model_name=MODEL_LIST[0], model=None):
95
  raise gr.Error("⚠️ Both text and model must be provided!")
96
  if not model:
97
  raise gr.Error("⚠️ Model must be provided!")
98
-
99
  try:
100
  start_time = time.time()
101
  result = model.extract(text, model_name)
102
-
103
  end_time = time.time()
104
  duration = end_time - start_time
105
  logging.info(f"Response time: {duration:.4f} seconds")
@@ -342,7 +346,8 @@ EXAMPLES = [
342
  [handle_text(text_fr)],
343
  [handle_text(text2_en)],
344
  [handle_text(text_es)],
345
- [handle_text(text3_en)]
 
346
  ]
347
 
348
  def generate_first_example():
 
59
  with open(text_es_file, 'r', encoding='utf-8') as file:
60
  text_es = file.read()
61
 
62
+ text_it_file = "./data/sample_it.txt"
63
+ with open(text_it_file, 'r', encoding='utf-8') as file:
64
+ text_it = file.read()
65
+
66
  # Create cache directory if it doesn't exist
67
  os.makedirs(CACHE_DIR, exist_ok=True)
68
  os.makedirs(WORKING_DIR, exist_ok=True)
 
99
  raise gr.Error("⚠️ Both text and model must be provided!")
100
  if not model:
101
  raise gr.Error("⚠️ Model must be provided!")
102
+
103
  try:
104
  start_time = time.time()
105
  result = model.extract(text, model_name)
106
+
107
  end_time = time.time()
108
  duration = end_time - start_time
109
  logging.info(f"Response time: {duration:.4f} seconds")
 
346
  [handle_text(text_fr)],
347
  [handle_text(text2_en)],
348
  [handle_text(text_es)],
349
+ [handle_text(text3_en)],
350
+ [handle_text(text_it)],
351
  ]
352
 
353
  def generate_first_example():
data/sample_it.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Ieri sera, passeggiando lungo la riva del lago di Como, ho assistito a uno dei tramonti più suggestivi della mia vita. Il cielo, dipinto di sfumature che variavano dall’arancio acceso al viola intenso, si rifletteva sull’acqua calma come una tela perfetta. Le montagne circostanti, avvolte da una leggera foschia, sembravano fondersi con le nuvole, creando un paesaggio che sembrava uscito da un quadro impressionista. Alcune barche a vela, silenziose e lente, solcavano le acque lasciando dietro di sé una scia dorata, mentre il profumo di gelsomino e di terra bagnata si mescolava all’aria fresca della sera. Per un momento, il tempo sembrava essersi fermato, e anche il chiacchiericcio dei pochi turisti presenti si era attenuato, come se tutti fossero rapiti da quella magia effimera. È in istanti come questi che ci si rende conto di quanto la natura, con la sua semplicità, sappia regalarci emozioni che né la tecnologia né la frenesia quotidiana potranno mai eguagliare.
llm_graph.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import time
 
3
 
4
  import numpy as np
5
  import networkx as nx
@@ -136,7 +137,7 @@ class LLMGraph:
136
  """
137
  Extract knowledge graph in structured format from text.
138
  """
139
-
140
  if model_name == MODEL_LIST[0]:
141
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
142
  messages = self._get_messages(text)
@@ -146,7 +147,7 @@ class LLMGraph:
146
  else:
147
  # Use LightRAG with Azure OpenAI
148
  self.rag.insert(text) # Insert the text into the RAG storage
149
-
150
  # Wait for GRAPHML_FILE to be created
151
  while not os.path.exists(GRAPHML_FILE):
152
  time.sleep(0.1) # Sleep for 0.1 seconds before checking again
 
1
  import os
2
  import time
3
+ # import shutil
4
 
5
  import numpy as np
6
  import networkx as nx
 
137
  """
138
  Extract knowledge graph in structured format from text.
139
  """
140
+
141
  if model_name == MODEL_LIST[0]:
142
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
143
  messages = self._get_messages(text)
 
147
  else:
148
  # Use LightRAG with Azure OpenAI
149
  self.rag.insert(text) # Insert the text into the RAG storage
150
+
151
  # Wait for GRAPHML_FILE to be created
152
  while not os.path.exists(GRAPHML_FILE):
153
  time.sleep(0.1) # Sleep for 0.1 seconds before checking again