Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from huggingface_hub import hf_hub_download, HfApi
|
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
import json
|
|
|
|
| 8 |
|
| 9 |
# ===== CONFIGURATION =====
|
| 10 |
TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
|
|
@@ -14,16 +15,61 @@ PROGRESS_FILENAME = "indexing_progress.json"
|
|
| 14 |
CONCEPTNET_BASE = "http://conceptnet.io"
|
| 15 |
# =========================
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
print(f"π Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
|
|
|
|
| 18 |
|
| 19 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
|
| 20 |
|
| 21 |
def log_progress(message, level="INFO"):
|
|
|
|
| 22 |
timestamp = time.strftime("%H:%M:%S")
|
| 23 |
prefix = {"INFO": "βΉοΈ ", "SUCCESS": "β
", "ERROR": "β", "WARN": "β οΈ ", "DEBUG": "π"}.get(level, "")
|
| 24 |
print(f"[{timestamp}] {prefix} {message}")
|
| 25 |
|
| 26 |
def check_remote_progress():
|
|
|
|
| 27 |
if not HF_TOKEN:
|
| 28 |
return {"indexing_complete": False}
|
| 29 |
try:
|
|
@@ -36,49 +82,59 @@ def check_remote_progress():
|
|
| 36 |
return {"indexing_complete": False}
|
| 37 |
|
| 38 |
def create_indexed_database():
|
|
|
|
| 39 |
progress = check_remote_progress()
|
| 40 |
if progress.get("indexing_complete"):
|
|
|
|
| 41 |
try:
|
| 42 |
return hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
|
| 43 |
-
except:
|
|
|
|
| 44 |
pass
|
|
|
|
| 45 |
return None
|
| 46 |
|
| 47 |
DB_PATH = create_indexed_database()
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def get_db_connection():
|
|
|
|
|
|
|
|
|
|
| 50 |
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
| 51 |
-
conn.execute("PRAGMA cache_size = -256000")
|
|
|
|
| 52 |
return conn
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
log_progress(f"Profile: {word} ({lang})", "INFO")
|
| 59 |
-
progress(0, desc="Starting...")
|
| 60 |
|
| 61 |
if not word or lang not in TARGET_LANGUAGES:
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
word = word.strip().lower().replace(' ', '_')
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
# FIXED: Use FULL URLs for relations!
|
| 68 |
-
relations = [
|
| 69 |
-
("IsA", f"{CONCEPTNET_BASE}/r/IsA"),
|
| 70 |
-
("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"),
|
| 71 |
-
("HasA", f"{CONCEPTNET_BASE}/r/HasA"),
|
| 72 |
-
("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"),
|
| 73 |
-
("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"),
|
| 74 |
-
("Causes", f"{CONCEPTNET_BASE}/r/Causes"),
|
| 75 |
-
("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"),
|
| 76 |
-
("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"),
|
| 77 |
-
("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"),
|
| 78 |
-
("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"),
|
| 79 |
-
("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"),
|
| 80 |
-
("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"),
|
| 81 |
-
]
|
| 82 |
|
| 83 |
output_md = f"# π§ Semantic Profile: '{word}' ({lang.upper()})\n\n"
|
| 84 |
|
|
@@ -86,95 +142,128 @@ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
|
|
| 86 |
with get_db_connection() as conn:
|
| 87 |
cursor = conn.cursor()
|
| 88 |
|
| 89 |
-
progress(0
|
| 90 |
-
|
| 91 |
-
# Find nodes
|
| 92 |
-
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
|
| 93 |
-
nodes = cursor.fetchall()
|
| 94 |
-
|
| 95 |
-
if not nodes:
|
| 96 |
-
return f"# π§ '{word}'\n\nβ οΈ Not found"
|
| 97 |
|
| 98 |
-
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
for node_id, label in nodes[:3]:
|
| 101 |
output_md += f"**Node:** `{node_id}` β **{label}**\n"
|
| 102 |
output_md += "\n"
|
| 103 |
|
|
|
|
|
|
|
| 104 |
total = 0
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
found = False
|
| 111 |
|
| 112 |
-
# Outgoing
|
| 113 |
-
|
| 114 |
-
|
| 115 |
SELECT en.label, e.weight
|
| 116 |
FROM edge e
|
| 117 |
JOIN node en ON e.end_id = en.id
|
| 118 |
-
WHERE e.start_id
|
| 119 |
ORDER BY e.weight DESC
|
| 120 |
LIMIT 7
|
| 121 |
-
"""
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
elapsed = time.time() -
|
| 125 |
-
|
| 126 |
-
log_progress(f" {rel_name} outgoing: {len(results)} in {elapsed:.3f}s", "DEBUG")
|
| 127 |
|
| 128 |
-
for label, weight in
|
| 129 |
-
|
| 130 |
found = True
|
| 131 |
total += 1
|
| 132 |
|
| 133 |
# Incoming
|
| 134 |
-
|
| 135 |
-
|
| 136 |
SELECT s.label, e.weight
|
| 137 |
FROM edge e
|
| 138 |
JOIN node s ON e.start_id = s.id
|
| 139 |
-
WHERE e.end_id
|
| 140 |
ORDER BY e.weight DESC
|
| 141 |
LIMIT 7
|
| 142 |
-
"""
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
elapsed = time.time() -
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
output_md += f"- *{label}* {rel_name} β **{word}** `[{weight:.3f}]`\n"
|
| 151 |
found = True
|
| 152 |
total += 1
|
| 153 |
|
| 154 |
if not found:
|
| 155 |
-
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
| 161 |
output_md += f"---\n**Total relations:** {total}\n"
|
| 162 |
-
|
| 163 |
log_progress(f"Profile complete: {total} relations", "SUCCESS")
|
| 164 |
-
|
| 165 |
-
|
| 166 |
|
| 167 |
except Exception as e:
|
| 168 |
log_progress(f"Error: {e}", "ERROR")
|
| 169 |
import traceback
|
| 170 |
traceback.print_exc()
|
| 171 |
-
|
| 172 |
|
| 173 |
def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
|
| 174 |
-
"""Query builder - FIXED
|
| 175 |
log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
|
| 176 |
progress(0, desc="Building...")
|
| 177 |
|
|
|
|
|
|
|
|
|
|
| 178 |
query = """
|
| 179 |
SELECT
|
| 180 |
e.id, s.id, r.label, en.id, e.weight, s.label, en.label
|
|
@@ -196,29 +285,25 @@ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
|
|
| 196 |
if start_node.startswith('http://'):
|
| 197 |
pattern = f"{start_node}%"
|
| 198 |
else:
|
| 199 |
-
pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
|
| 200 |
query += " AND s.id LIKE ?"
|
| 201 |
params.append(pattern)
|
| 202 |
|
| 203 |
-
# Relation -
|
| 204 |
if relation and relation.strip():
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
rel_value
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
query += " AND r.id = ?"
|
| 213 |
-
params.append(rel_value)
|
| 214 |
-
log_progress(f"Relation: {rel_value}", "DEBUG")
|
| 215 |
|
| 216 |
# End node
|
| 217 |
if end_node and end_node.strip():
|
| 218 |
if end_node.startswith('http://'):
|
| 219 |
pattern = f"{end_node}%"
|
| 220 |
else:
|
| 221 |
-
pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
|
| 222 |
query += " AND en.id LIKE ?"
|
| 223 |
params.append(pattern)
|
| 224 |
|
|
@@ -248,8 +333,13 @@ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
|
|
| 248 |
return pd.DataFrame(), f"β {e}"
|
| 249 |
|
| 250 |
def run_raw_query(sql_query):
|
|
|
|
| 251 |
if not sql_query.strip().upper().startswith("SELECT"):
|
| 252 |
-
return pd.DataFrame(), "β Only SELECT"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
try:
|
| 254 |
with get_db_connection() as conn:
|
| 255 |
start = time.time()
|
|
@@ -260,109 +350,156 @@ def run_raw_query(sql_query):
|
|
| 260 |
return pd.DataFrame(), f"β {e}"
|
| 261 |
|
| 262 |
def get_schema_info():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
md = f"# π Schema\n\n"
|
| 264 |
md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
|
| 265 |
-
md += "
|
| 266 |
-
md += "β
**Relations use FULL URLs:** `http://conceptnet.io/r/IsA` (not `/r/IsA`)\n\n"
|
| 267 |
-
md += "β
**Nodes use FULL URLs:** `http://conceptnet.io/c/en/dog` (not `/c/en/dog`)\n\n"
|
| 268 |
|
| 269 |
try:
|
| 270 |
with get_db_connection() as conn:
|
| 271 |
cursor = conn.cursor()
|
| 272 |
|
| 273 |
-
md += "##
|
| 274 |
-
cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
|
| 275 |
-
for rel_id, label in cursor.fetchall():
|
| 276 |
-
md += f"- **{label}:** `{rel_id}`\n"
|
| 277 |
-
|
| 278 |
-
md += "\n## Tables\n\n"
|
| 279 |
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
| 280 |
-
|
|
|
|
| 281 |
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
| 282 |
md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
except Exception as e:
|
| 285 |
-
md += f"\
|
| 286 |
|
| 287 |
return md
|
| 288 |
|
| 289 |
-
# UI
|
| 290 |
with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
|
| 291 |
gr.Markdown("# π§ ConceptNet Explorer")
|
| 292 |
-
gr.Markdown(f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} |
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
)
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
gr.
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA")
|
| 330 |
-
end_input = gr.Textbox(label="End Node", placeholder="")
|
| 331 |
-
|
| 332 |
-
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50)
|
| 333 |
-
query_btn = gr.Button("βΆοΈ Run Query", variant="primary", size="lg")
|
| 334 |
-
|
| 335 |
-
status_output = gr.Markdown()
|
| 336 |
-
results_output = gr.DataFrame(wrap=True)
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10",
|
| 344 |
-
lines=3
|
| 345 |
-
)
|
| 346 |
-
|
| 347 |
-
raw_btn = gr.Button("βΆοΈ Execute")
|
| 348 |
-
raw_status = gr.Markdown()
|
| 349 |
-
raw_results = gr.DataFrame()
|
| 350 |
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
gr.Markdown(
|
| 356 |
-
"---\n"
|
| 357 |
-
"**Fixed:** Relations now use full URLs (`http://conceptnet.io/r/IsA`) | "
|
| 358 |
-
"**Performance:** Exact match on rel_id = fast queries!"
|
| 359 |
-
)
|
| 360 |
-
|
| 361 |
-
semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output)
|
| 362 |
-
query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output])
|
| 363 |
-
raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status])
|
| 364 |
-
schema_btn.click(get_schema_info, None, schema_output)
|
| 365 |
|
| 366 |
if __name__ == "__main__":
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
| 368 |
demo.launch(ssr_mode=False)
|
|
|
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
import json
|
| 8 |
+
from typing import Dict, List, Optional
|
| 9 |
|
| 10 |
# ===== CONFIGURATION =====
|
| 11 |
TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
|
|
|
|
| 15 |
CONCEPTNET_BASE = "http://conceptnet.io"
|
| 16 |
# =========================
|
| 17 |
|
| 18 |
+
# --- All relations MUST be full URLs ---
|
| 19 |
+
CONCEPTNET_RELATIONS: Dict[str, str] = {
|
| 20 |
+
"RelatedTo": f"{CONCEPTNET_BASE}/r/RelatedTo",
|
| 21 |
+
"IsA": f"{CONCEPTNET_BASE}/r/IsA",
|
| 22 |
+
"PartOf": f"{CONCEPTNET_BASE}/r/PartOf",
|
| 23 |
+
"HasA": f"{CONCEPTNET_BASE}/r/HasA",
|
| 24 |
+
"UsedFor": f"{CONCEPTNET_BASE}/r/UsedFor",
|
| 25 |
+
"CapableOf": f"{CONCEPTNET_BASE}/r/CapableOf",
|
| 26 |
+
"AtLocation": f"{CONCEPTNET_BASE}/r/AtLocation",
|
| 27 |
+
"Causes": f"{CONCEPTNET_BASE}/r/Causes",
|
| 28 |
+
"HasSubevent": f"{CONCEPTNET_BASE}/r/HasSubevent",
|
| 29 |
+
"HasFirstSubevent": f"{CONCEPTNET_BASE}/r/HasFirstSubevent",
|
| 30 |
+
"HasLastSubevent": f"{CONCEPTNET_BASE}/r/HasLastSubevent",
|
| 31 |
+
"HasPrerequisite": f"{CONCEPTNET_BASE}/r/HasPrerequisite",
|
| 32 |
+
"HasProperty": f"{CONCEPTNET_BASE}/r/HasProperty",
|
| 33 |
+
"MotivatedByGoal": f"{CONCEPTNET_BASE}/r/MotivatedByGoal",
|
| 34 |
+
"ObstructedBy": f"{CONCEPTNET_BASE}/r/ObstructedBy",
|
| 35 |
+
"Desires": f"{CONCEPTNET_BASE}/r/Desires",
|
| 36 |
+
"CreatedBy": f"{CONCEPTNET_BASE}/r/CreatedBy",
|
| 37 |
+
"Synonym": f"{CONCEPTNET_BASE}/r/Synonym",
|
| 38 |
+
"Antonym": f"{CONCEPTNET_BASE}/r/Antonym",
|
| 39 |
+
"DistinctFrom": f"{CONCEPTNET_BASE}/r/DistinctFrom",
|
| 40 |
+
"DerivedFrom": f"{CONCEPTNET_BASE}/r/DerivedFrom",
|
| 41 |
+
"SymbolOf": f"{CONCEPTNET_BASE}/r/SymbolOf",
|
| 42 |
+
"DefinedAs": f"{CONCEPTNET_BASE}/r/DefinedAs",
|
| 43 |
+
"MannerOf": f"{CONCEPTNET_BASE}/r/MannerOf",
|
| 44 |
+
"LocatedNear": f"{CONCEPTNET_BASE}/r/LocatedNear",
|
| 45 |
+
"HasContext": f"{CONCEPTNET_BASE}/r/HasContext",
|
| 46 |
+
"SimilarTo": f"{CONCEPTNET_BASE}/r/SimilarTo",
|
| 47 |
+
"EtymologicallyRelatedTo": f"{CONCEPTNET_BASE}/r/EtymologicallyRelatedTo",
|
| 48 |
+
"EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE}/r/EtymologicallyDerivedFrom",
|
| 49 |
+
"CausesDesire": f"{CONCEPTNET_BASE}/r/CausesDesire",
|
| 50 |
+
"MadeOf": f"{CONCEPTNET_BASE}/r/MadeOf",
|
| 51 |
+
"ReceivesAction": f"{CONCEPTNET_BASE}/r/ReceivesAction",
|
| 52 |
+
"ExternalURL": f"{CONCEPTNET_BASE}/r/ExternalURL",
|
| 53 |
+
"NotDesires": f"{CONCEPTNET_BASE}/r/NotDesires",
|
| 54 |
+
"NotUsedFor": f"{CONCEPTNET_BASE}/r/NotUsedFor",
|
| 55 |
+
"NotCapableOf": f"{CONCEPTNET_BASE}/r/NotCapableOf",
|
| 56 |
+
"NotHasProperty": f"{CONCEPTNET_BASE}/r/NotHasProperty",
|
| 57 |
+
}
|
| 58 |
+
# =========================
|
| 59 |
+
|
| 60 |
print(f"π Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
|
| 61 |
+
print(f"π Relations: {len(CONCEPTNET_RELATIONS)} relations loaded")
|
| 62 |
|
| 63 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
|
| 64 |
|
| 65 |
def log_progress(message, level="INFO"):
|
| 66 |
+
"""Simple logger with timestamp and emoji prefix."""
|
| 67 |
timestamp = time.strftime("%H:%M:%S")
|
| 68 |
prefix = {"INFO": "βΉοΈ ", "SUCCESS": "β
", "ERROR": "β", "WARN": "β οΈ ", "DEBUG": "π"}.get(level, "")
|
| 69 |
print(f"[{timestamp}] {prefix} {message}")
|
| 70 |
|
| 71 |
def check_remote_progress():
|
| 72 |
+
"""Check Hugging Face Hub for indexing progress."""
|
| 73 |
if not HF_TOKEN:
|
| 74 |
return {"indexing_complete": False}
|
| 75 |
try:
|
|
|
|
| 82 |
return {"indexing_complete": False}
|
| 83 |
|
| 84 |
def create_indexed_database():
|
| 85 |
+
"""Download the pre-indexed database from HF Hub if indexing is complete."""
|
| 86 |
progress = check_remote_progress()
|
| 87 |
if progress.get("indexing_complete"):
|
| 88 |
+
log_progress("Remote indexing is complete. Downloading DB...", "INFO")
|
| 89 |
try:
|
| 90 |
return hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
log_progress(f"Failed to download indexed DB: {e}", "ERROR")
|
| 93 |
pass
|
| 94 |
+
log_progress("Remote indexing not complete or DB not found.", "WARN")
|
| 95 |
return None
|
| 96 |
|
| 97 |
DB_PATH = create_indexed_database()
|
| 98 |
|
| 99 |
+
if not DB_PATH:
|
| 100 |
+
log_progress("DATABASE NOT FOUND. App will not function.", "ERROR")
|
| 101 |
+
else:
|
| 102 |
+
log_progress(f"Database loaded from: {DB_PATH}", "SUCCESS")
|
| 103 |
+
|
| 104 |
def get_db_connection():
|
| 105 |
+
"""Get a thread-safe connection to the SQLite database."""
|
| 106 |
+
if not DB_PATH:
|
| 107 |
+
raise Exception("Database path is not set. Cannot create connection.")
|
| 108 |
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
| 109 |
+
conn.execute("PRAGMA cache_size = -256000") # 256MB cache
|
| 110 |
+
conn.execute("PRAGMA temp_store = MEMORY")
|
| 111 |
return conn
|
| 112 |
|
| 113 |
+
def get_semantic_profile(word: str, lang: str = 'en', selected_relations: List[str] = None, progress=gr.Progress()):
|
| 114 |
+
"""
|
| 115 |
+
Get semantic profile for a word.
|
| 116 |
+
FIX: This is now a generator to stream updates and show progress.
|
| 117 |
+
FIX: Uses exact node match first for performance.
|
| 118 |
+
"""
|
| 119 |
log_progress(f"Profile: {word} ({lang})", "INFO")
|
|
|
|
| 120 |
|
| 121 |
if not word or lang not in TARGET_LANGUAGES:
|
| 122 |
+
yield "β οΈ Invalid input"
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
if not DB_PATH:
|
| 126 |
+
yield "β **Error:** Database file not found."
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
# Set default relations if none are selected
|
| 130 |
+
if not selected_relations:
|
| 131 |
+
selected_relations = [
|
| 132 |
+
"IsA", "RelatedTo", "PartOf", "HasA", "UsedFor",
|
| 133 |
+
"CapableOf", "Synonym", "Antonym"
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
word = word.strip().lower().replace(' ', '_')
|
| 137 |
+
exact_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
output_md = f"# π§ Semantic Profile: '{word}' ({lang.upper()})\n\n"
|
| 140 |
|
|
|
|
| 142 |
with get_db_connection() as conn:
|
| 143 |
cursor = conn.cursor()
|
| 144 |
|
| 145 |
+
progress(0, desc="Starting...")
|
| 146 |
+
yield output_md
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
+
progress(0.05, desc="Finding nodes...")
|
| 149 |
|
| 150 |
+
# --- PERFORMANCE FIX: Try exact match first ---
|
| 151 |
+
cursor.execute("SELECT id, label FROM node WHERE id = ?", (exact_path,))
|
| 152 |
+
exact_node = cursor.fetchone()
|
| 153 |
+
|
| 154 |
+
query_path = None
|
| 155 |
+
query_operator = ""
|
| 156 |
+
nodes = []
|
| 157 |
+
|
| 158 |
+
if exact_node:
|
| 159 |
+
log_progress(f"Found exact node: {exact_node[0]}", "SUCCESS")
|
| 160 |
+
nodes = [exact_node]
|
| 161 |
+
query_path = exact_path
|
| 162 |
+
query_operator = "=" # Use fast exact match
|
| 163 |
+
else:
|
| 164 |
+
log_progress(f"No exact node, falling back to LIKE...", "WARN")
|
| 165 |
+
like_path = f"{exact_path}%"
|
| 166 |
+
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
|
| 167 |
+
nodes = cursor.fetchall()
|
| 168 |
+
if not nodes:
|
| 169 |
+
yield f"# π§ '{word}'\n\nβ οΈ Not found"
|
| 170 |
+
return
|
| 171 |
+
query_path = like_path # Use slower LIKE match
|
| 172 |
+
query_operator = "LIKE"
|
| 173 |
+
|
| 174 |
+
log_progress(f"Using path: {query_path} (op: {query_operator})", "INFO")
|
| 175 |
+
|
| 176 |
for node_id, label in nodes[:3]:
|
| 177 |
output_md += f"**Node:** `{node_id}` β **{label}**\n"
|
| 178 |
output_md += "\n"
|
| 179 |
|
| 180 |
+
yield output_md # Yield after finding nodes
|
| 181 |
+
|
| 182 |
total = 0
|
| 183 |
|
| 184 |
+
# Filter relations to query
|
| 185 |
+
relations_to_query = [(name, CONCEPTNET_RELATIONS[name]) for name in selected_relations if name in CONCEPTNET_RELATIONS]
|
| 186 |
+
num_relations = len(relations_to_query)
|
| 187 |
+
|
| 188 |
+
if num_relations == 0:
|
| 189 |
+
output_md += "β οΈ No relations selected."
|
| 190 |
+
yield output_md
|
| 191 |
+
return
|
| 192 |
+
|
| 193 |
+
for i, (rel_name, rel_url) in enumerate(relations_to_query):
|
| 194 |
+
progress((i + 0.1) / num_relations, desc=f"π {rel_name}...")
|
| 195 |
|
| 196 |
+
rel_md = f"## {rel_name}\n\n"
|
| 197 |
found = False
|
| 198 |
|
| 199 |
+
# Outgoing
|
| 200 |
+
start_time = time.time()
|
| 201 |
+
sql_out = f"""
|
| 202 |
SELECT en.label, e.weight
|
| 203 |
FROM edge e
|
| 204 |
JOIN node en ON e.end_id = en.id
|
| 205 |
+
WHERE e.start_id {query_operator} ? AND e.rel_id = ?
|
| 206 |
ORDER BY e.weight DESC
|
| 207 |
LIMIT 7
|
| 208 |
+
"""
|
| 209 |
+
cursor.execute(sql_out, (query_path, rel_url))
|
| 210 |
+
results_out = cursor.fetchall()
|
| 211 |
+
elapsed = time.time() - start_time
|
| 212 |
+
log_progress(f" {rel_name} outgoing: {len(results_out)} in {elapsed:.3f}s", "DEBUG")
|
|
|
|
| 213 |
|
| 214 |
+
for label, weight in results_out:
|
| 215 |
+
rel_md += f"- **{word}** {rel_name} β *{label}* `[{weight:.3f}]`\n"
|
| 216 |
found = True
|
| 217 |
total += 1
|
| 218 |
|
| 219 |
# Incoming
|
| 220 |
+
start_time = time.time()
|
| 221 |
+
sql_in = f"""
|
| 222 |
SELECT s.label, e.weight
|
| 223 |
FROM edge e
|
| 224 |
JOIN node s ON e.start_id = s.id
|
| 225 |
+
WHERE e.end_id {query_operator} ? AND e.rel_id = ?
|
| 226 |
ORDER BY e.weight DESC
|
| 227 |
LIMIT 7
|
| 228 |
+
"""
|
| 229 |
+
cursor.execute(sql_in, (query_path, rel_url))
|
| 230 |
+
results_in = cursor.fetchall()
|
| 231 |
+
elapsed = time.time() - start_time
|
| 232 |
+
log_progress(f" {rel_name} incoming: {len(results_in)} in {elapsed:.3f}s", "DEBUG")
|
| 233 |
+
|
| 234 |
+
for label, weight in results_in:
|
| 235 |
+
rel_md += f"- *{label}* {rel_name} β **{word}** `[{weight:.3f}]`\n"
|
|
|
|
| 236 |
found = True
|
| 237 |
total += 1
|
| 238 |
|
| 239 |
if not found:
|
| 240 |
+
rel_md += "*No results*\n"
|
| 241 |
|
| 242 |
+
rel_md += "\n"
|
| 243 |
+
|
| 244 |
+
output_md += rel_md # Add this relation's results to the full markdown
|
| 245 |
+
progress((i + 1) / num_relations, desc=f"β {rel_name}")
|
| 246 |
+
yield output_md # --- PROGRESS FIX: Yield intermediate results ---
|
| 247 |
+
|
| 248 |
output_md += f"---\n**Total relations:** {total}\n"
|
|
|
|
| 249 |
log_progress(f"Profile complete: {total} relations", "SUCCESS")
|
| 250 |
+
progress(1.0, desc="β
Complete!")
|
| 251 |
+
yield output_md # Yield final result
|
| 252 |
|
| 253 |
except Exception as e:
|
| 254 |
log_progress(f"Error: {e}", "ERROR")
|
| 255 |
import traceback
|
| 256 |
traceback.print_exc()
|
| 257 |
+
yield f"**β Error:** {e}"
|
| 258 |
|
| 259 |
def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
|
| 260 |
+
"""Query builder - FIXED to use relation names from dropdown."""
|
| 261 |
log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
|
| 262 |
progress(0, desc="Building...")
|
| 263 |
|
| 264 |
+
if not DB_PATH:
|
| 265 |
+
return pd.DataFrame(), "β **Error:** Database file not found."
|
| 266 |
+
|
| 267 |
query = """
|
| 268 |
SELECT
|
| 269 |
e.id, s.id, r.label, en.id, e.weight, s.label, en.label
|
|
|
|
| 285 |
if start_node.startswith('http://'):
|
| 286 |
pattern = f"{start_node}%"
|
| 287 |
else:
|
| 288 |
+
pattern = f"{CONCEPTNET_BASE}/c/en/{start_node.strip().lower().replace(' ', '_')}%"
|
| 289 |
query += " AND s.id LIKE ?"
|
| 290 |
params.append(pattern)
|
| 291 |
|
| 292 |
+
# Relation - FIX: Use dictionary
|
| 293 |
if relation and relation.strip():
|
| 294 |
+
rel_value = CONCEPTNET_RELATIONS.get(relation.strip())
|
| 295 |
+
if rel_value:
|
| 296 |
+
query += " AND r.id = ?"
|
| 297 |
+
params.append(rel_value)
|
| 298 |
+
log_progress(f"Relation: {rel_value}", "DEBUG")
|
| 299 |
+
# If relation is blank or invalid, filter is just not added
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
# End node
|
| 302 |
if end_node and end_node.strip():
|
| 303 |
if end_node.startswith('http://'):
|
| 304 |
pattern = f"{end_node}%"
|
| 305 |
else:
|
| 306 |
+
pattern = f"{CONCEPTNET_BASE}/c/en/{end_node.strip().lower().replace(' ', '_')}%"
|
| 307 |
query += " AND en.id LIKE ?"
|
| 308 |
params.append(pattern)
|
| 309 |
|
|
|
|
| 333 |
return pd.DataFrame(), f"β {e}"
|
| 334 |
|
| 335 |
def run_raw_query(sql_query):
|
| 336 |
+
"""Execute a raw SELECT SQL query."""
|
| 337 |
if not sql_query.strip().upper().startswith("SELECT"):
|
| 338 |
+
return pd.DataFrame(), "β Only SELECT queries are allowed."
|
| 339 |
+
|
| 340 |
+
if not DB_PATH:
|
| 341 |
+
return pd.DataFrame(), "β **Error:** Database file not found."
|
| 342 |
+
|
| 343 |
try:
|
| 344 |
with get_db_connection() as conn:
|
| 345 |
start = time.time()
|
|
|
|
| 350 |
return pd.DataFrame(), f"β {e}"
|
| 351 |
|
| 352 |
def get_schema_info():
|
| 353 |
+
"""Get schema information, including tables, row counts, and indices."""
|
| 354 |
+
if not DB_PATH:
|
| 355 |
+
return "β **Error:** Database file not found."
|
| 356 |
+
|
| 357 |
md = f"# π Schema\n\n"
|
| 358 |
md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
|
| 359 |
+
md += "**Key:** Relations and Nodes use full `http://conceptnet.io/...` URLs.\n\n"
|
|
|
|
|
|
|
| 360 |
|
| 361 |
try:
|
| 362 |
with get_db_connection() as conn:
|
| 363 |
cursor = conn.cursor()
|
| 364 |
|
| 365 |
+
md += "## Tables & Row Counts\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
| 367 |
+
tables = cursor.fetchall()
|
| 368 |
+
for (table,) in tables:
|
| 369 |
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
| 370 |
md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
|
| 371 |
+
|
| 372 |
+
md += "\n## Indices\n\n"
|
| 373 |
+
cursor.execute("SELECT name, sql FROM sqlite_master WHERE type='index'")
|
| 374 |
+
for name, sql in cursor.fetchall():
|
| 375 |
+
if sql: # Filter out auto-indices
|
| 376 |
+
md += f"- **{name}:** `{sql}`\n"
|
| 377 |
+
|
| 378 |
+
md += "\n## Common Relations\n\n"
|
| 379 |
+
cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
|
| 380 |
+
for rel_id, label in cursor.fetchall():
|
| 381 |
+
md += f"- **{label}:** `{rel_id}`\n"
|
| 382 |
+
|
| 383 |
except Exception as e:
|
| 384 |
+
md += f"\n**β Error:** {e}\n"
|
| 385 |
|
| 386 |
return md
|
| 387 |
|
| 388 |
+
# ===== Build Gradio UI =====
|
| 389 |
with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
|
| 390 |
gr.Markdown("# π§ ConceptNet Explorer")
|
| 391 |
+
gr.Markdown(f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | **Relations:** {len(CONCEPTNET_RELATIONS)} types")
|
| 392 |
|
| 393 |
+
if not DB_PATH:
|
| 394 |
+
gr.Markdown("## β ERROR: DATABASE FILE NOT FOUND")
|
| 395 |
+
gr.Markdown("This app cannot start because the SQLite database file could not be downloaded from Hugging Face Hub. Please check the logs.")
|
| 396 |
+
|
| 397 |
+
else:
|
| 398 |
+
with gr.Tabs():
|
| 399 |
+
with gr.TabItem("π Semantic Profile"):
|
| 400 |
+
gr.Markdown("**Explore semantic relations for any word. Progress bar and output will update live.**")
|
| 401 |
+
|
| 402 |
+
with gr.Row():
|
| 403 |
+
word_input = gr.Textbox(
|
| 404 |
+
label="Word",
|
| 405 |
+
placeholder="e.g., dog, hund, perro",
|
| 406 |
+
value="dog",
|
| 407 |
+
scale=3
|
| 408 |
+
)
|
| 409 |
+
lang_input = gr.Dropdown(
|
| 410 |
+
choices=TARGET_LANGUAGES,
|
| 411 |
+
value="en",
|
| 412 |
+
label="Language",
|
| 413 |
+
scale=1
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
with gr.Accordion("Select Relations (fewer = faster)", open=False):
|
| 417 |
+
relation_input = gr.CheckboxGroup(
|
| 418 |
+
choices=list(CONCEPTNET_RELATIONS.keys()),
|
| 419 |
+
label="Relations to Query",
|
| 420 |
+
value=["IsA", "RelatedTo", "PartOf", "HasA", "UsedFor", "CapableOf", "Synonym", "Antonym", "AtLocation", "HasProperty"]
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
semantic_btn = gr.Button("π Get Semantic Profile", variant="primary", size="lg")
|
| 424 |
+
semantic_output = gr.Markdown(value="Click the button to get the semantic profile.")
|
| 425 |
+
|
| 426 |
+
gr.Examples(
|
| 427 |
+
examples=[
|
| 428 |
+
["dog", "en"],
|
| 429 |
+
["hund", "de"],
|
| 430 |
+
["perro", "es"],
|
| 431 |
+
["chat", "fr"],
|
| 432 |
+
["knowledge", "en"],
|
| 433 |
+
],
|
| 434 |
+
inputs=[word_input, lang_input],
|
| 435 |
+
label="Examples"
|
| 436 |
)
|
| 437 |
|
| 438 |
+
with gr.TabItem("β‘ Query Builder"):
|
| 439 |
+
gr.Markdown("**Build custom relationship queries using dropdowns.**")
|
| 440 |
+
|
| 441 |
+
with gr.Row():
|
| 442 |
+
start_input = gr.Textbox(label="Start Node", placeholder="dog (optional)")
|
| 443 |
+
rel_input = gr.Dropdown(
|
| 444 |
+
choices=[""] + list(CONCEPTNET_RELATIONS.keys()), # Add "" for 'any'
|
| 445 |
+
label="Relation",
|
| 446 |
+
value="IsA",
|
| 447 |
+
info="Leave blank to query all relations"
|
| 448 |
+
)
|
| 449 |
+
end_input = gr.Textbox(label="End Node", placeholder="(optional)")
|
| 450 |
+
|
| 451 |
+
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=500, value=50, step=1)
|
| 452 |
+
query_btn = gr.Button("βΆοΈ Run Query", variant="primary", size="lg")
|
| 453 |
+
|
| 454 |
+
status_output = gr.Markdown()
|
| 455 |
+
results_output = gr.DataFrame(wrap=True, height=400)
|
| 456 |
|
| 457 |
+
with gr.TabItem("π» Raw SQL"):
|
| 458 |
+
gr.Markdown("**Execute custom `SELECT` SQL queries against the database.**")
|
| 459 |
+
|
| 460 |
+
raw_sql_input = gr.Textbox(
|
| 461 |
+
label="SQL Query",
|
| 462 |
+
value=f"SELECT s.label, r.label, e.label, e.weight\nFROM edge e\nJOIN node s ON e.start_id = s.id\nJOIN node e ON e.end_id = e.id\nJOIN relation r ON e.rel_id = r.id\nWHERE s.id = '{CONCEPTNET_BASE}/c/en/dog'\nAND r.id = '{CONCEPTNET_BASE}/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10",
|
| 463 |
+
lines=5,
|
| 464 |
+
font_family="monospace"
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
raw_btn = gr.Button("βΆοΈ Execute")
|
| 468 |
+
raw_status = gr.Markdown()
|
| 469 |
+
raw_results = gr.DataFrame(height=400)
|
| 470 |
+
|
| 471 |
+
with gr.TabItem("π Schema"):
|
| 472 |
+
gr.Markdown("**View database schema, tables, and indices.**")
|
| 473 |
+
schema_btn = gr.Button("π Load Schema Info")
|
| 474 |
+
schema_output = gr.Markdown()
|
| 475 |
+
|
| 476 |
+
# --- Button Click Handlers ---
|
| 477 |
+
semantic_btn.click(
|
| 478 |
+
get_semantic_profile,
|
| 479 |
+
inputs=[word_input, lang_input, relation_input],
|
| 480 |
+
outputs=semantic_output
|
| 481 |
+
)
|
| 482 |
|
| 483 |
+
query_btn.click(
|
| 484 |
+
run_query,
|
| 485 |
+
inputs=[start_input, rel_input, end_input, limit_slider],
|
| 486 |
+
outputs=[results_output, status_output]
|
| 487 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
+
raw_btn.click(
|
| 490 |
+
run_raw_query,
|
| 491 |
+
inputs=raw_sql_input,
|
| 492 |
+
outputs=[raw_results, raw_status]
|
| 493 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
|
| 495 |
+
# Load schema info automatically or on button click
|
| 496 |
+
# Use .then() to load it after the UI is built
|
| 497 |
+
demo.load(get_schema_info, None, schema_output)
|
| 498 |
+
schema_btn.click(get_schema_info, None, schema_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
|
| 500 |
if __name__ == "__main__":
|
| 501 |
+
if DB_PATH:
|
| 502 |
+
log_progress("APP READY!", "SUCCESS")
|
| 503 |
+
else:
|
| 504 |
+
log_progress("APP LAUNCHING WITH ERRORS (DB NOT FOUND)", "ERROR")
|
| 505 |
demo.launch(ssr_mode=False)
|