cstr commited on
Commit
26dddb5
Β·
verified Β·
1 Parent(s): f45efe5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +303 -166
app.py CHANGED
@@ -5,6 +5,7 @@ from huggingface_hub import hf_hub_download, HfApi
5
  import os
6
  import time
7
  import json
 
8
 
9
  # ===== CONFIGURATION =====
10
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
@@ -14,16 +15,61 @@ PROGRESS_FILENAME = "indexing_progress.json"
14
  CONCEPTNET_BASE = "http://conceptnet.io"
15
  # =========================
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
 
18
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
20
 
21
  def log_progress(message, level="INFO"):
 
22
  timestamp = time.strftime("%H:%M:%S")
23
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
24
  print(f"[{timestamp}] {prefix} {message}")
25
 
26
  def check_remote_progress():
 
27
  if not HF_TOKEN:
28
  return {"indexing_complete": False}
29
  try:
@@ -36,49 +82,59 @@ def check_remote_progress():
36
  return {"indexing_complete": False}
37
 
38
  def create_indexed_database():
 
39
  progress = check_remote_progress()
40
  if progress.get("indexing_complete"):
 
41
  try:
42
  return hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
43
- except:
 
44
  pass
 
45
  return None
46
 
47
  DB_PATH = create_indexed_database()
48
 
 
 
 
 
 
49
  def get_db_connection():
 
 
 
50
  conn = sqlite3.connect(DB_PATH, check_same_thread=False)
51
- conn.execute("PRAGMA cache_size = -256000")
 
52
  return conn
53
 
54
- log_progress("Database loaded", "SUCCESS")
55
-
56
- def get_semantic_profile(word, lang='en', progress=gr.Progress()):
57
- """FIXED: Use full URL for relations!"""
 
 
58
  log_progress(f"Profile: {word} ({lang})", "INFO")
59
- progress(0, desc="Starting...")
60
 
61
  if not word or lang not in TARGET_LANGUAGES:
62
- return "⚠️ Invalid input"
63
-
 
 
 
 
 
 
 
 
 
 
 
 
64
  word = word.strip().lower().replace(' ', '_')
65
- like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
66
-
67
- # FIXED: Use FULL URLs for relations!
68
- relations = [
69
- ("IsA", f"{CONCEPTNET_BASE}/r/IsA"),
70
- ("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"),
71
- ("HasA", f"{CONCEPTNET_BASE}/r/HasA"),
72
- ("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"),
73
- ("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"),
74
- ("Causes", f"{CONCEPTNET_BASE}/r/Causes"),
75
- ("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"),
76
- ("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"),
77
- ("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"),
78
- ("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"),
79
- ("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"),
80
- ("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"),
81
- ]
82
 
83
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
84
 
@@ -86,95 +142,128 @@ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
86
  with get_db_connection() as conn:
87
  cursor = conn.cursor()
88
 
89
- progress(0.05, desc="Finding nodes...")
90
-
91
- # Find nodes
92
- cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
93
- nodes = cursor.fetchall()
94
-
95
- if not nodes:
96
- return f"# 🧠 '{word}'\n\n⚠️ Not found"
97
 
98
- log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  for node_id, label in nodes[:3]:
101
  output_md += f"**Node:** `{node_id}` β†’ **{label}**\n"
102
  output_md += "\n"
103
 
 
 
104
  total = 0
105
 
106
- for i, (rel_name, rel_url) in enumerate(relations):
107
- progress((i + 1) / len(relations), desc=f"Querying {rel_name}...")
 
 
 
 
 
 
 
 
 
108
 
109
- output_md += f"## {rel_name}\n\n"
110
  found = False
111
 
112
- # Outgoing - use exact match on rel_id (FAST!)
113
- start = time.time()
114
- cursor.execute("""
115
  SELECT en.label, e.weight
116
  FROM edge e
117
  JOIN node en ON e.end_id = en.id
118
- WHERE e.start_id LIKE ? AND e.rel_id = ?
119
  ORDER BY e.weight DESC
120
  LIMIT 7
121
- """, (like_path, rel_url))
122
-
123
- results = cursor.fetchall()
124
- elapsed = time.time() - start
125
-
126
- log_progress(f" {rel_name} outgoing: {len(results)} in {elapsed:.3f}s", "DEBUG")
127
 
128
- for label, weight in results:
129
- output_md += f"- **{word}** {rel_name} β†’ *{label}* `[{weight:.3f}]`\n"
130
  found = True
131
  total += 1
132
 
133
  # Incoming
134
- start = time.time()
135
- cursor.execute("""
136
  SELECT s.label, e.weight
137
  FROM edge e
138
  JOIN node s ON e.start_id = s.id
139
- WHERE e.end_id LIKE ? AND e.rel_id = ?
140
  ORDER BY e.weight DESC
141
  LIMIT 7
142
- """, (like_path, rel_url))
143
-
144
- results = cursor.fetchall()
145
- elapsed = time.time() - start
146
-
147
- log_progress(f" {rel_name} incoming: {len(results)} in {elapsed:.3f}s", "DEBUG")
148
-
149
- for label, weight in results:
150
- output_md += f"- *{label}* {rel_name} β†’ **{word}** `[{weight:.3f}]`\n"
151
  found = True
152
  total += 1
153
 
154
  if not found:
155
- output_md += "*No results*\n"
156
 
157
- output_md += "\n"
158
-
159
- progress(1.0, desc="Complete!")
160
-
 
 
161
  output_md += f"---\n**Total relations:** {total}\n"
162
-
163
  log_progress(f"Profile complete: {total} relations", "SUCCESS")
164
-
165
- return output_md
166
 
167
  except Exception as e:
168
  log_progress(f"Error: {e}", "ERROR")
169
  import traceback
170
  traceback.print_exc()
171
- return f"**❌ Error:** {e}"
172
 
173
  def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
174
- """Query builder - FIXED with full URLs"""
175
  log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
176
  progress(0, desc="Building...")
177
 
 
 
 
178
  query = """
179
  SELECT
180
  e.id, s.id, r.label, en.id, e.weight, s.label, en.label
@@ -196,29 +285,25 @@ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
196
  if start_node.startswith('http://'):
197
  pattern = f"{start_node}%"
198
  else:
199
- pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
200
  query += " AND s.id LIKE ?"
201
  params.append(pattern)
202
 
203
- # Relation - FIXED: use full URL!
204
  if relation and relation.strip():
205
- if relation.startswith('http://'):
206
- rel_value = relation
207
- elif relation.startswith('/r/'):
208
- rel_value = f"{CONCEPTNET_BASE}{relation}"
209
- else:
210
- rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
211
-
212
- query += " AND r.id = ?"
213
- params.append(rel_value)
214
- log_progress(f"Relation: {rel_value}", "DEBUG")
215
 
216
  # End node
217
  if end_node and end_node.strip():
218
  if end_node.startswith('http://'):
219
  pattern = f"{end_node}%"
220
  else:
221
- pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
222
  query += " AND en.id LIKE ?"
223
  params.append(pattern)
224
 
@@ -248,8 +333,13 @@ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
248
  return pd.DataFrame(), f"❌ {e}"
249
 
250
  def run_raw_query(sql_query):
 
251
  if not sql_query.strip().upper().startswith("SELECT"):
252
- return pd.DataFrame(), "❌ Only SELECT"
 
 
 
 
253
  try:
254
  with get_db_connection() as conn:
255
  start = time.time()
@@ -260,109 +350,156 @@ def run_raw_query(sql_query):
260
  return pd.DataFrame(), f"❌ {e}"
261
 
262
  def get_schema_info():
 
 
 
 
263
  md = f"# πŸ“š Schema\n\n"
264
  md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
265
- md += "## Key Discovery\n\n"
266
- md += "βœ… **Relations use FULL URLs:** `http://conceptnet.io/r/IsA` (not `/r/IsA`)\n\n"
267
- md += "βœ… **Nodes use FULL URLs:** `http://conceptnet.io/c/en/dog` (not `/c/en/dog`)\n\n"
268
 
269
  try:
270
  with get_db_connection() as conn:
271
  cursor = conn.cursor()
272
 
273
- md += "## Common Relations\n\n"
274
- cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
275
- for rel_id, label in cursor.fetchall():
276
- md += f"- **{label}:** `{rel_id}`\n"
277
-
278
- md += "\n## Tables\n\n"
279
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
280
- for table, in cursor.fetchall():
 
281
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
282
  md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
283
-
 
 
 
 
 
 
 
 
 
 
 
284
  except Exception as e:
285
- md += f"\nError: {e}\n"
286
 
287
  return md
288
 
289
- # UI
290
  with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
291
  gr.Markdown("# 🧠 ConceptNet Explorer")
292
- gr.Markdown(f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | βœ… **FIXED:** Using full URLs for relations!")
293
 
294
- with gr.Tabs():
295
- with gr.TabItem("πŸ” Semantic Profile"):
296
- gr.Markdown("**Explore semantic relations for any word**")
297
-
298
- with gr.Row():
299
- word_input = gr.Textbox(
300
- label="Word",
301
- placeholder="dog",
302
- value="dog",
303
- info="Enter a word"
304
- )
305
- lang_input = gr.Dropdown(
306
- choices=TARGET_LANGUAGES,
307
- value="en",
308
- label="Language"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  )
310
 
311
- semantic_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
312
- semantic_output = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
- gr.Examples(
315
- examples=[
316
- ["dog", "en"],
317
- ["hund", "de"],
318
- ["perro", "es"],
319
- ["chien", "fr"],
320
- ],
321
- inputs=[word_input, lang_input]
322
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- with gr.TabItem("⚑ Query Builder"):
325
- gr.Markdown("**Build custom relationship queries**")
326
-
327
- with gr.Row():
328
- start_input = gr.Textbox(label="Start Node", placeholder="dog")
329
- rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA")
330
- end_input = gr.Textbox(label="End Node", placeholder="")
331
-
332
- limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50)
333
- query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
334
-
335
- status_output = gr.Markdown()
336
- results_output = gr.DataFrame(wrap=True)
337
 
338
- with gr.TabItem("πŸ’» Raw SQL"):
339
- gr.Markdown("**Execute custom SQL queries**")
340
-
341
- raw_sql_input = gr.Textbox(
342
- label="SQL Query",
343
- value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10",
344
- lines=3
345
- )
346
-
347
- raw_btn = gr.Button("▢️ Execute")
348
- raw_status = gr.Markdown()
349
- raw_results = gr.DataFrame()
350
 
351
- with gr.TabItem("πŸ“Š Schema"):
352
- schema_btn = gr.Button("πŸ“Š Load Schema")
353
- schema_output = gr.Markdown()
354
-
355
- gr.Markdown(
356
- "---\n"
357
- "**Fixed:** Relations now use full URLs (`http://conceptnet.io/r/IsA`) | "
358
- "**Performance:** Exact match on rel_id = fast queries!"
359
- )
360
-
361
- semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output)
362
- query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output])
363
- raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status])
364
- schema_btn.click(get_schema_info, None, schema_output)
365
 
366
  if __name__ == "__main__":
367
- log_progress("APP READY - Relations use full URLs now!", "SUCCESS")
 
 
 
368
  demo.launch(ssr_mode=False)
 
5
  import os
6
  import time
7
  import json
8
+ from typing import Dict, List, Optional
9
 
10
  # ===== CONFIGURATION =====
11
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
 
15
  CONCEPTNET_BASE = "http://conceptnet.io"
16
  # =========================
17
 
18
+ # --- All relations MUST be full URLs ---
19
+ CONCEPTNET_RELATIONS: Dict[str, str] = {
20
+ "RelatedTo": f"{CONCEPTNET_BASE}/r/RelatedTo",
21
+ "IsA": f"{CONCEPTNET_BASE}/r/IsA",
22
+ "PartOf": f"{CONCEPTNET_BASE}/r/PartOf",
23
+ "HasA": f"{CONCEPTNET_BASE}/r/HasA",
24
+ "UsedFor": f"{CONCEPTNET_BASE}/r/UsedFor",
25
+ "CapableOf": f"{CONCEPTNET_BASE}/r/CapableOf",
26
+ "AtLocation": f"{CONCEPTNET_BASE}/r/AtLocation",
27
+ "Causes": f"{CONCEPTNET_BASE}/r/Causes",
28
+ "HasSubevent": f"{CONCEPTNET_BASE}/r/HasSubevent",
29
+ "HasFirstSubevent": f"{CONCEPTNET_BASE}/r/HasFirstSubevent",
30
+ "HasLastSubevent": f"{CONCEPTNET_BASE}/r/HasLastSubevent",
31
+ "HasPrerequisite": f"{CONCEPTNET_BASE}/r/HasPrerequisite",
32
+ "HasProperty": f"{CONCEPTNET_BASE}/r/HasProperty",
33
+ "MotivatedByGoal": f"{CONCEPTNET_BASE}/r/MotivatedByGoal",
34
+ "ObstructedBy": f"{CONCEPTNET_BASE}/r/ObstructedBy",
35
+ "Desires": f"{CONCEPTNET_BASE}/r/Desires",
36
+ "CreatedBy": f"{CONCEPTNET_BASE}/r/CreatedBy",
37
+ "Synonym": f"{CONCEPTNET_BASE}/r/Synonym",
38
+ "Antonym": f"{CONCEPTNET_BASE}/r/Antonym",
39
+ "DistinctFrom": f"{CONCEPTNET_BASE}/r/DistinctFrom",
40
+ "DerivedFrom": f"{CONCEPTNET_BASE}/r/DerivedFrom",
41
+ "SymbolOf": f"{CONCEPTNET_BASE}/r/SymbolOf",
42
+ "DefinedAs": f"{CONCEPTNET_BASE}/r/DefinedAs",
43
+ "MannerOf": f"{CONCEPTNET_BASE}/r/MannerOf",
44
+ "LocatedNear": f"{CONCEPTNET_BASE}/r/LocatedNear",
45
+ "HasContext": f"{CONCEPTNET_BASE}/r/HasContext",
46
+ "SimilarTo": f"{CONCEPTNET_BASE}/r/SimilarTo",
47
+ "EtymologicallyRelatedTo": f"{CONCEPTNET_BASE}/r/EtymologicallyRelatedTo",
48
+ "EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE}/r/EtymologicallyDerivedFrom",
49
+ "CausesDesire": f"{CONCEPTNET_BASE}/r/CausesDesire",
50
+ "MadeOf": f"{CONCEPTNET_BASE}/r/MadeOf",
51
+ "ReceivesAction": f"{CONCEPTNET_BASE}/r/ReceivesAction",
52
+ "ExternalURL": f"{CONCEPTNET_BASE}/r/ExternalURL",
53
+ "NotDesires": f"{CONCEPTNET_BASE}/r/NotDesires",
54
+ "NotUsedFor": f"{CONCEPTNET_BASE}/r/NotUsedFor",
55
+ "NotCapableOf": f"{CONCEPTNET_BASE}/r/NotCapableOf",
56
+ "NotHasProperty": f"{CONCEPTNET_BASE}/r/NotHasProperty",
57
+ }
58
+ # =========================
59
+
60
  print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
61
+ print(f"πŸ“š Relations: {len(CONCEPTNET_RELATIONS)} relations loaded")
62
 
63
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
64
 
65
  def log_progress(message, level="INFO"):
66
+ """Simple logger with timestamp and emoji prefix."""
67
  timestamp = time.strftime("%H:%M:%S")
68
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
69
  print(f"[{timestamp}] {prefix} {message}")
70
 
71
  def check_remote_progress():
72
+ """Check Hugging Face Hub for indexing progress."""
73
  if not HF_TOKEN:
74
  return {"indexing_complete": False}
75
  try:
 
82
  return {"indexing_complete": False}
83
 
84
  def create_indexed_database():
85
+ """Download the pre-indexed database from HF Hub if indexing is complete."""
86
  progress = check_remote_progress()
87
  if progress.get("indexing_complete"):
88
+ log_progress("Remote indexing is complete. Downloading DB...", "INFO")
89
  try:
90
  return hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
91
+ except Exception as e:
92
+ log_progress(f"Failed to download indexed DB: {e}", "ERROR")
93
  pass
94
+ log_progress("Remote indexing not complete or DB not found.", "WARN")
95
  return None
96
 
97
  DB_PATH = create_indexed_database()
98
 
99
+ if not DB_PATH:
100
+ log_progress("DATABASE NOT FOUND. App will not function.", "ERROR")
101
+ else:
102
+ log_progress(f"Database loaded from: {DB_PATH}", "SUCCESS")
103
+
104
  def get_db_connection():
105
+ """Get a thread-safe connection to the SQLite database."""
106
+ if not DB_PATH:
107
+ raise Exception("Database path is not set. Cannot create connection.")
108
  conn = sqlite3.connect(DB_PATH, check_same_thread=False)
109
+ conn.execute("PRAGMA cache_size = -256000") # 256MB cache
110
+ conn.execute("PRAGMA temp_store = MEMORY")
111
  return conn
112
 
113
+ def get_semantic_profile(word: str, lang: str = 'en', selected_relations: List[str] = None, progress=gr.Progress()):
114
+ """
115
+ Get semantic profile for a word.
116
+ FIX: This is now a generator to stream updates and show progress.
117
+ FIX: Uses exact node match first for performance.
118
+ """
119
  log_progress(f"Profile: {word} ({lang})", "INFO")
 
120
 
121
  if not word or lang not in TARGET_LANGUAGES:
122
+ yield "⚠️ Invalid input"
123
+ return
124
+
125
+ if not DB_PATH:
126
+ yield "❌ **Error:** Database file not found."
127
+ return
128
+
129
+ # Set default relations if none are selected
130
+ if not selected_relations:
131
+ selected_relations = [
132
+ "IsA", "RelatedTo", "PartOf", "HasA", "UsedFor",
133
+ "CapableOf", "Synonym", "Antonym"
134
+ ]
135
+
136
  word = word.strip().lower().replace(' ', '_')
137
+ exact_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
140
 
 
142
  with get_db_connection() as conn:
143
  cursor = conn.cursor()
144
 
145
+ progress(0, desc="Starting...")
146
+ yield output_md
 
 
 
 
 
 
147
 
148
+ progress(0.05, desc="Finding nodes...")
149
 
150
+ # --- PERFORMANCE FIX: Try exact match first ---
151
+ cursor.execute("SELECT id, label FROM node WHERE id = ?", (exact_path,))
152
+ exact_node = cursor.fetchone()
153
+
154
+ query_path = None
155
+ query_operator = ""
156
+ nodes = []
157
+
158
+ if exact_node:
159
+ log_progress(f"Found exact node: {exact_node[0]}", "SUCCESS")
160
+ nodes = [exact_node]
161
+ query_path = exact_path
162
+ query_operator = "=" # Use fast exact match
163
+ else:
164
+ log_progress(f"No exact node, falling back to LIKE...", "WARN")
165
+ like_path = f"{exact_path}%"
166
+ cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
167
+ nodes = cursor.fetchall()
168
+ if not nodes:
169
+ yield f"# 🧠 '{word}'\n\n⚠️ Not found"
170
+ return
171
+ query_path = like_path # Use slower LIKE match
172
+ query_operator = "LIKE"
173
+
174
+ log_progress(f"Using path: {query_path} (op: {query_operator})", "INFO")
175
+
176
  for node_id, label in nodes[:3]:
177
  output_md += f"**Node:** `{node_id}` β†’ **{label}**\n"
178
  output_md += "\n"
179
 
180
+ yield output_md # Yield after finding nodes
181
+
182
  total = 0
183
 
184
+ # Filter relations to query
185
+ relations_to_query = [(name, CONCEPTNET_RELATIONS[name]) for name in selected_relations if name in CONCEPTNET_RELATIONS]
186
+ num_relations = len(relations_to_query)
187
+
188
+ if num_relations == 0:
189
+ output_md += "⚠️ No relations selected."
190
+ yield output_md
191
+ return
192
+
193
+ for i, (rel_name, rel_url) in enumerate(relations_to_query):
194
+ progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
195
 
196
+ rel_md = f"## {rel_name}\n\n"
197
  found = False
198
 
199
+ # Outgoing
200
+ start_time = time.time()
201
+ sql_out = f"""
202
  SELECT en.label, e.weight
203
  FROM edge e
204
  JOIN node en ON e.end_id = en.id
205
+ WHERE e.start_id {query_operator} ? AND e.rel_id = ?
206
  ORDER BY e.weight DESC
207
  LIMIT 7
208
+ """
209
+ cursor.execute(sql_out, (query_path, rel_url))
210
+ results_out = cursor.fetchall()
211
+ elapsed = time.time() - start_time
212
+ log_progress(f" {rel_name} outgoing: {len(results_out)} in {elapsed:.3f}s", "DEBUG")
 
213
 
214
+ for label, weight in results_out:
215
+ rel_md += f"- **{word}** {rel_name} β†’ *{label}* `[{weight:.3f}]`\n"
216
  found = True
217
  total += 1
218
 
219
  # Incoming
220
+ start_time = time.time()
221
+ sql_in = f"""
222
  SELECT s.label, e.weight
223
  FROM edge e
224
  JOIN node s ON e.start_id = s.id
225
+ WHERE e.end_id {query_operator} ? AND e.rel_id = ?
226
  ORDER BY e.weight DESC
227
  LIMIT 7
228
+ """
229
+ cursor.execute(sql_in, (query_path, rel_url))
230
+ results_in = cursor.fetchall()
231
+ elapsed = time.time() - start_time
232
+ log_progress(f" {rel_name} incoming: {len(results_in)} in {elapsed:.3f}s", "DEBUG")
233
+
234
+ for label, weight in results_in:
235
+ rel_md += f"- *{label}* {rel_name} β†’ **{word}** `[{weight:.3f}]`\n"
 
236
  found = True
237
  total += 1
238
 
239
  if not found:
240
+ rel_md += "*No results*\n"
241
 
242
+ rel_md += "\n"
243
+
244
+ output_md += rel_md # Add this relation's results to the full markdown
245
+ progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
246
+ yield output_md # --- PROGRESS FIX: Yield intermediate results ---
247
+
248
  output_md += f"---\n**Total relations:** {total}\n"
 
249
  log_progress(f"Profile complete: {total} relations", "SUCCESS")
250
+ progress(1.0, desc="βœ… Complete!")
251
+ yield output_md # Yield final result
252
 
253
  except Exception as e:
254
  log_progress(f"Error: {e}", "ERROR")
255
  import traceback
256
  traceback.print_exc()
257
+ yield f"**❌ Error:** {e}"
258
 
259
  def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
260
+ """Query builder - FIXED to use relation names from dropdown."""
261
  log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
262
  progress(0, desc="Building...")
263
 
264
+ if not DB_PATH:
265
+ return pd.DataFrame(), "❌ **Error:** Database file not found."
266
+
267
  query = """
268
  SELECT
269
  e.id, s.id, r.label, en.id, e.weight, s.label, en.label
 
285
  if start_node.startswith('http://'):
286
  pattern = f"{start_node}%"
287
  else:
288
+ pattern = f"{CONCEPTNET_BASE}/c/en/{start_node.strip().lower().replace(' ', '_')}%"
289
  query += " AND s.id LIKE ?"
290
  params.append(pattern)
291
 
292
+ # Relation - FIX: Use dictionary
293
  if relation and relation.strip():
294
+ rel_value = CONCEPTNET_RELATIONS.get(relation.strip())
295
+ if rel_value:
296
+ query += " AND r.id = ?"
297
+ params.append(rel_value)
298
+ log_progress(f"Relation: {rel_value}", "DEBUG")
299
+ # If relation is blank or invalid, filter is just not added
 
 
 
 
300
 
301
  # End node
302
  if end_node and end_node.strip():
303
  if end_node.startswith('http://'):
304
  pattern = f"{end_node}%"
305
  else:
306
+ pattern = f"{CONCEPTNET_BASE}/c/en/{end_node.strip().lower().replace(' ', '_')}%"
307
  query += " AND en.id LIKE ?"
308
  params.append(pattern)
309
 
 
333
  return pd.DataFrame(), f"❌ {e}"
334
 
335
  def run_raw_query(sql_query):
336
+ """Execute a raw SELECT SQL query."""
337
  if not sql_query.strip().upper().startswith("SELECT"):
338
+ return pd.DataFrame(), "❌ Only SELECT queries are allowed."
339
+
340
+ if not DB_PATH:
341
+ return pd.DataFrame(), "❌ **Error:** Database file not found."
342
+
343
  try:
344
  with get_db_connection() as conn:
345
  start = time.time()
 
350
  return pd.DataFrame(), f"❌ {e}"
351
 
352
  def get_schema_info():
353
+ """Get schema information, including tables, row counts, and indices."""
354
+ if not DB_PATH:
355
+ return "❌ **Error:** Database file not found."
356
+
357
  md = f"# πŸ“š Schema\n\n"
358
  md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
359
+ md += "**Key:** Relations and Nodes use full `http://conceptnet.io/...` URLs.\n\n"
 
 
360
 
361
  try:
362
  with get_db_connection() as conn:
363
  cursor = conn.cursor()
364
 
365
+ md += "## Tables & Row Counts\n\n"
 
 
 
 
 
366
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
367
+ tables = cursor.fetchall()
368
+ for (table,) in tables:
369
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
370
  md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
371
+
372
+ md += "\n## Indices\n\n"
373
+ cursor.execute("SELECT name, sql FROM sqlite_master WHERE type='index'")
374
+ for name, sql in cursor.fetchall():
375
+ if sql: # Filter out auto-indices
376
+ md += f"- **{name}:** `{sql}`\n"
377
+
378
+ md += "\n## Common Relations\n\n"
379
+ cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
380
+ for rel_id, label in cursor.fetchall():
381
+ md += f"- **{label}:** `{rel_id}`\n"
382
+
383
  except Exception as e:
384
+ md += f"\n**❌ Error:** {e}\n"
385
 
386
  return md
387
 
388
+ # ===== Build Gradio UI =====
389
  with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
390
  gr.Markdown("# 🧠 ConceptNet Explorer")
391
+ gr.Markdown(f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | **Relations:** {len(CONCEPTNET_RELATIONS)} types")
392
 
393
+ if not DB_PATH:
394
+ gr.Markdown("## ❌ ERROR: DATABASE FILE NOT FOUND")
395
+ gr.Markdown("This app cannot start because the SQLite database file could not be downloaded from Hugging Face Hub. Please check the logs.")
396
+
397
+ else:
398
+ with gr.Tabs():
399
+ with gr.TabItem("πŸ” Semantic Profile"):
400
+ gr.Markdown("**Explore semantic relations for any word. Progress bar and output will update live.**")
401
+
402
+ with gr.Row():
403
+ word_input = gr.Textbox(
404
+ label="Word",
405
+ placeholder="e.g., dog, hund, perro",
406
+ value="dog",
407
+ scale=3
408
+ )
409
+ lang_input = gr.Dropdown(
410
+ choices=TARGET_LANGUAGES,
411
+ value="en",
412
+ label="Language",
413
+ scale=1
414
+ )
415
+
416
+ with gr.Accordion("Select Relations (fewer = faster)", open=False):
417
+ relation_input = gr.CheckboxGroup(
418
+ choices=list(CONCEPTNET_RELATIONS.keys()),
419
+ label="Relations to Query",
420
+ value=["IsA", "RelatedTo", "PartOf", "HasA", "UsedFor", "CapableOf", "Synonym", "Antonym", "AtLocation", "HasProperty"]
421
+ )
422
+
423
+ semantic_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
424
+ semantic_output = gr.Markdown(value="Click the button to get the semantic profile.")
425
+
426
+ gr.Examples(
427
+ examples=[
428
+ ["dog", "en"],
429
+ ["hund", "de"],
430
+ ["perro", "es"],
431
+ ["chat", "fr"],
432
+ ["knowledge", "en"],
433
+ ],
434
+ inputs=[word_input, lang_input],
435
+ label="Examples"
436
  )
437
 
438
+ with gr.TabItem("⚑ Query Builder"):
439
+ gr.Markdown("**Build custom relationship queries using dropdowns.**")
440
+
441
+ with gr.Row():
442
+ start_input = gr.Textbox(label="Start Node", placeholder="dog (optional)")
443
+ rel_input = gr.Dropdown(
444
+ choices=[""] + list(CONCEPTNET_RELATIONS.keys()), # Add "" for 'any'
445
+ label="Relation",
446
+ value="IsA",
447
+ info="Leave blank to query all relations"
448
+ )
449
+ end_input = gr.Textbox(label="End Node", placeholder="(optional)")
450
+
451
+ limit_slider = gr.Slider(label="Limit", minimum=1, maximum=500, value=50, step=1)
452
+ query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
453
+
454
+ status_output = gr.Markdown()
455
+ results_output = gr.DataFrame(wrap=True, height=400)
456
 
457
+ with gr.TabItem("πŸ’» Raw SQL"):
458
+ gr.Markdown("**Execute custom `SELECT` SQL queries against the database.**")
459
+
460
+ raw_sql_input = gr.Textbox(
461
+ label="SQL Query",
462
+ value=f"SELECT s.label, r.label, e.label, e.weight\nFROM edge e\nJOIN node s ON e.start_id = s.id\nJOIN node e ON e.end_id = e.id\nJOIN relation r ON e.rel_id = r.id\nWHERE s.id = '{CONCEPTNET_BASE}/c/en/dog'\nAND r.id = '{CONCEPTNET_BASE}/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10",
463
+ lines=5,
464
+ font_family="monospace"
465
+ )
466
+
467
+ raw_btn = gr.Button("▢️ Execute")
468
+ raw_status = gr.Markdown()
469
+ raw_results = gr.DataFrame(height=400)
470
+
471
+ with gr.TabItem("πŸ“Š Schema"):
472
+ gr.Markdown("**View database schema, tables, and indices.**")
473
+ schema_btn = gr.Button("πŸ“Š Load Schema Info")
474
+ schema_output = gr.Markdown()
475
+
476
+ # --- Button Click Handlers ---
477
+ semantic_btn.click(
478
+ get_semantic_profile,
479
+ inputs=[word_input, lang_input, relation_input],
480
+ outputs=semantic_output
481
+ )
482
 
483
+ query_btn.click(
484
+ run_query,
485
+ inputs=[start_input, rel_input, end_input, limit_slider],
486
+ outputs=[results_output, status_output]
487
+ )
 
 
 
 
 
 
 
 
488
 
489
+ raw_btn.click(
490
+ run_raw_query,
491
+ inputs=raw_sql_input,
492
+ outputs=[raw_results, raw_status]
493
+ )
 
 
 
 
 
 
 
494
 
495
+ # Load schema info automatically or on button click
496
+ # Use .then() to load it after the UI is built
497
+ demo.load(get_schema_info, None, schema_output)
498
+ schema_btn.click(get_schema_info, None, schema_output)
 
 
 
 
 
 
 
 
 
 
499
 
500
  if __name__ == "__main__":
501
+ if DB_PATH:
502
+ log_progress("APP READY!", "SUCCESS")
503
+ else:
504
+ log_progress("APP LAUNCHING WITH ERRORS (DB NOT FOUND)", "ERROR")
505
  demo.launch(ssr_mode=False)