Spaces:
Running
on
Zero
Running
on
Zero
Added Italian text
Browse files- CLAUDE.md +13 -2
- app.py +8 -3
- data/sample_it.txt +1 -0
- llm_graph.py +3 -2
CLAUDE.md
CHANGED
@@ -40,7 +40,7 @@ AZURE_EMBEDDING_DEPLOYMENT=<embedding_deployment>
|
|
40 |
AZURE_EMBEDDING_API_VERSION=<embedding_api_version>
|
41 |
```
|
42 |
|
43 |
-
##
|
44 |
|
45 |
```bash
|
46 |
# Install dependencies
|
@@ -48,6 +48,9 @@ pip install -r requirements.txt
|
|
48 |
|
49 |
# Run the Gradio app
|
50 |
python app.py
|
|
|
|
|
|
|
51 |
```
|
52 |
|
53 |
## Key Dependencies
|
@@ -76,4 +79,12 @@ The application expects JSON output with this schema:
|
|
76 |
"nodes": [{"id": "entity", "type": "broad_type", "detailed_type": "specific_type"}],
|
77 |
"edges": [{"from": "entity1", "to": "entity2", "label": "relationship"}]
|
78 |
}
|
79 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
AZURE_EMBEDDING_API_VERSION=<embedding_api_version>
|
41 |
```
|
42 |
|
43 |
+
## Development Commands
|
44 |
|
45 |
```bash
|
46 |
# Install dependencies
|
|
|
48 |
|
49 |
# Run the Gradio app
|
50 |
python app.py
|
51 |
+
|
52 |
+
# Test model extraction directly
|
53 |
+
python llm_graph.py
|
54 |
```
|
55 |
|
56 |
## Key Dependencies
|
|
|
79 |
"nodes": [{"id": "entity", "type": "broad_type", "detailed_type": "specific_type"}],
|
80 |
"edges": [{"from": "entity1", "to": "entity2", "label": "relationship"}]
|
81 |
}
|
82 |
+
```
|
83 |
+
|
84 |
+
## Testing and Development Notes
|
85 |
+
|
86 |
+
- No formal test suite exists; manual testing through the Gradio interface
|
87 |
+
- First example is automatically cached for performance on startup
|
88 |
+
- Cache files stored in `cache/` directory as pickle files
|
89 |
+
- Working directory `sample/` is cleared and recreated on each run
|
90 |
+
- GraphML files generated by LightRAG for Azure OpenAI model backend
|
app.py
CHANGED
@@ -59,6 +59,10 @@ text_es_file = "./data/sample_es.txt"
|
|
59 |
with open(text_es_file, 'r', encoding='utf-8') as file:
|
60 |
text_es = file.read()
|
61 |
|
|
|
|
|
|
|
|
|
62 |
# Create cache directory if it doesn't exist
|
63 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
64 |
os.makedirs(WORKING_DIR, exist_ok=True)
|
@@ -95,11 +99,11 @@ def extract_kg(text="", model_name=MODEL_LIST[0], model=None):
|
|
95 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
96 |
if not model:
|
97 |
raise gr.Error("⚠️ Model must be provided!")
|
98 |
-
|
99 |
try:
|
100 |
start_time = time.time()
|
101 |
result = model.extract(text, model_name)
|
102 |
-
|
103 |
end_time = time.time()
|
104 |
duration = end_time - start_time
|
105 |
logging.info(f"Response time: {duration:.4f} seconds")
|
@@ -342,7 +346,8 @@ EXAMPLES = [
|
|
342 |
[handle_text(text_fr)],
|
343 |
[handle_text(text2_en)],
|
344 |
[handle_text(text_es)],
|
345 |
-
[handle_text(text3_en)]
|
|
|
346 |
]
|
347 |
|
348 |
def generate_first_example():
|
|
|
59 |
with open(text_es_file, 'r', encoding='utf-8') as file:
|
60 |
text_es = file.read()
|
61 |
|
62 |
+
text_it_file = "./data/sample_it.txt"
|
63 |
+
with open(text_it_file, 'r', encoding='utf-8') as file:
|
64 |
+
text_it = file.read()
|
65 |
+
|
66 |
# Create cache directory if it doesn't exist
|
67 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
68 |
os.makedirs(WORKING_DIR, exist_ok=True)
|
|
|
99 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
100 |
if not model:
|
101 |
raise gr.Error("⚠️ Model must be provided!")
|
102 |
+
|
103 |
try:
|
104 |
start_time = time.time()
|
105 |
result = model.extract(text, model_name)
|
106 |
+
|
107 |
end_time = time.time()
|
108 |
duration = end_time - start_time
|
109 |
logging.info(f"Response time: {duration:.4f} seconds")
|
|
|
346 |
[handle_text(text_fr)],
|
347 |
[handle_text(text2_en)],
|
348 |
[handle_text(text_es)],
|
349 |
+
[handle_text(text3_en)],
|
350 |
+
[handle_text(text_it)],
|
351 |
]
|
352 |
|
353 |
def generate_first_example():
|
data/sample_it.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Ieri sera, passeggiando lungo la riva del lago di Como, ho assistito a uno dei tramonti più suggestivi della mia vita. Il cielo, dipinto di sfumature che variavano dall’arancio acceso al viola intenso, si rifletteva sull’acqua calma come una tela perfetta. Le montagne circostanti, avvolte da una leggera foschia, sembravano fondersi con le nuvole, creando un paesaggio che sembrava uscito da un quadro impressionista. Alcune barche a vela, silenziose e lente, solcavano le acque lasciando dietro di sé una scia dorata, mentre il profumo di gelsomino e di terra bagnata si mescolava all’aria fresca della sera. Per un momento, il tempo sembrava essersi fermato, e anche il chiacchiericcio dei pochi turisti presenti si era attenuato, come se tutti fossero rapiti da quella magia effimera. È in istanti come questi che ci si rende conto di quanto la natura, con la sua semplicità, sappia regalarci emozioni che né la tecnologia né la frenesia quotidiana potranno mai eguagliare.
|
llm_graph.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import time
|
|
|
3 |
|
4 |
import numpy as np
|
5 |
import networkx as nx
|
@@ -136,7 +137,7 @@ class LLMGraph:
|
|
136 |
"""
|
137 |
Extract knowledge graph in structured format from text.
|
138 |
"""
|
139 |
-
|
140 |
if model_name == MODEL_LIST[0]:
|
141 |
# Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
|
142 |
messages = self._get_messages(text)
|
@@ -146,7 +147,7 @@ class LLMGraph:
|
|
146 |
else:
|
147 |
# Use LightRAG with Azure OpenAI
|
148 |
self.rag.insert(text) # Insert the text into the RAG storage
|
149 |
-
|
150 |
# Wait for GRAPHML_FILE to be created
|
151 |
while not os.path.exists(GRAPHML_FILE):
|
152 |
time.sleep(0.1) # Sleep for 0.1 seconds before checking again
|
|
|
1 |
import os
|
2 |
import time
|
3 |
+
# import shutil
|
4 |
|
5 |
import numpy as np
|
6 |
import networkx as nx
|
|
|
137 |
"""
|
138 |
Extract knowledge graph in structured format from text.
|
139 |
"""
|
140 |
+
|
141 |
if model_name == MODEL_LIST[0]:
|
142 |
# Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
|
143 |
messages = self._get_messages(text)
|
|
|
147 |
else:
|
148 |
# Use LightRAG with Azure OpenAI
|
149 |
self.rag.insert(text) # Insert the text into the RAG storage
|
150 |
+
|
151 |
# Wait for GRAPHML_FILE to be created
|
152 |
while not os.path.exists(GRAPHML_FILE):
|
153 |
time.sleep(0.1) # Sleep for 0.1 seconds before checking again
|