alfakat commited on
Commit
cc98bea
·
verified ·
1 Parent(s): 8d465b0

cleaned up the GUI

Browse files
Files changed (1) hide show
  1. app.py +80 -58
app.py CHANGED
@@ -7,12 +7,16 @@ os.environ["TRANSFORMERS_NO_FLAX"] = "1"
7
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
8
 
9
  CRUD_VERB = {
10
- "INSERT": "inserted",
11
- "UPDATE": "updated",
12
  "DELETE": "deleted",
13
- "SELECT": "retrieved",
14
  }
15
 
 
 
 
 
16
  def detect_command(sql_text: str) -> str:
17
  m = re.search(r"\b(INSERT|UPDATE|DELETE|SELECT)\b", sql_text, flags=re.IGNORECASE)
18
  return m.group(1).upper() if m else "OTHER"
@@ -22,7 +26,7 @@ def parse_table_name(sql: str) -> str:
22
  r"INSERT\s+INTO\s+([A-Za-z0-9\.\[\]_]+)",
23
  r"UPDATE\s+([A-Za-z0-9\.\[\]_]+)",
24
  r"DELETE\s+FROM\s+([A-Za-z0-9\.\[\]_]+)",
25
- r"FROM\s+([A-Za-z0-9\.\[\]_]+)",
26
  ]:
27
  m = re.search(pat, sql, flags=re.IGNORECASE)
28
  if m:
@@ -35,6 +39,16 @@ def clean_statement(text: str) -> str:
35
  text += "."
36
  return text[0].upper() + text[1:] if text else text
37
 
 
 
 
 
 
 
 
 
 
 
38
  def summarize_insert(sql: str) -> str:
39
  table = parse_table_name(sql)
40
  cols_match = re.search(r"\(\s*([^)]+?)\s*\)\s*VALUES", sql, flags=re.IGNORECASE | re.DOTALL)
@@ -61,18 +75,20 @@ def summarize_insert(sql: str) -> str:
61
 
62
  count = len(tuples)
63
  verb = CRUD_VERB["INSERT"]
 
64
  if user_ids:
65
  groups = {}
66
  for u in user_ids:
67
  groups[u] = groups.get(u, 0) + 1
68
  if len(groups) == 1:
69
  uid = next(iter(groups))
70
- return f"{count} record(s) {verb} into {table} (user_id {uid})."
71
  else:
72
- parts = [f"{n} with user_id {uid}" for uid, n in sorted(groups.items())]
73
- return f"{count} record(s) {verb} into {table} ({', '.join(parts)})."
74
  else:
75
- return f"{count} record(s) {verb} into {table}."
 
76
 
77
  def summarize_update(sql: str) -> str:
78
  table = parse_table_name(sql)
@@ -84,32 +100,36 @@ def summarize_update(sql: str) -> str:
84
  col = a.split("=")[0].strip().strip("[]")
85
  if col:
86
  changed_cols.append(col)
 
87
  where = ""
88
  w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
89
  if w:
90
  where = re.sub(r"\s+", " ", w.group(1)).strip()
91
 
92
  verb = CRUD_VERB["UPDATE"]
93
- base = f"Record(s) {verb} in {table}"
94
  if changed_cols:
95
- if len(changed_cols) <= 3:
96
- base += f" (changed: {', '.join(changed_cols)})"
97
- else:
98
- base += f" ({len(changed_cols)} columns changed)"
99
  if where:
100
- base += f" where {where}"
101
  return base + "."
102
 
 
103
  def summarize_delete(sql: str) -> str:
104
  table = parse_table_name(sql)
105
  where = ""
106
  w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
107
  if w:
108
  where = re.sub(r"\s+", " ", w.group(1)).strip()
 
 
109
  verb = CRUD_VERB["DELETE"]
110
- base = f"Record(s) {verb} from {table}"
 
 
111
  if where:
112
- base += f" where {where}"
113
  return base + "."
114
 
115
  def summarize_select(sql: str) -> str:
@@ -126,7 +146,7 @@ def summarize_select(sql: str) -> str:
126
  verb = CRUD_VERB["SELECT"]
127
  base = f"{cols} will be {verb} from {table}"
128
  if where:
129
- base += f" where {where}"
130
  return base + "."
131
 
132
  def deterministic_summary(sql_text: str) -> str:
@@ -141,6 +161,7 @@ def deterministic_summary(sql_text: str) -> str:
141
  return summarize_select(sql_text)
142
  return "Unrecognized SQL command."
143
 
 
144
  _HAS_T5 = False
145
  try:
146
  from transformers import T5Tokenizer, T5ForConditionalGeneration
@@ -156,7 +177,7 @@ CRUD_PROMPT = {
156
  "UPDATE": "Rewrite as a clear statement that existing records will be updated. Keep names and conditions.",
157
  "DELETE": "Rewrite as a clear statement that records will be deleted. Keep conditions if present.",
158
  "SELECT": "Rewrite as a clear statement that data will be retrieved. Keep table/filters.",
159
- "OTHER": "Rewrite as a short, clear statement for non-technical users.",
160
  }
161
 
162
  def load_t5():
@@ -170,8 +191,7 @@ def rephrase_with_t5(summary: str, cmd: str) -> str:
170
  return summary
171
  load_t5()
172
  instruction = CRUD_PROMPT.get(cmd, CRUD_PROMPT["OTHER"])
173
- # Force a statement; avoid question opening
174
- input_text = f"explain sql in plain english statement: {instruction} {summary} </s>"
175
  feats = _T5_TOKENIZER([input_text], return_tensors="pt")
176
  out = _T5_MODEL.generate(
177
  input_ids=feats["input_ids"],
@@ -182,11 +202,42 @@ def rephrase_with_t5(summary: str, cmd: str) -> str:
182
  decoded = _T5_TOKENIZER.decode(out[0], skip_special_tokens=True)
183
  return clean_statement(decoded)
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  EXAMPLES = [
187
  # INSERT
188
  """INSERT INTO demo_database..user_records (record_id, person_id, created_at)
189
- VALUES (101, 5, GETDATE()), (102, 5, GETDATE()), (103, 5, GETDATE());""",
190
  # UPDATE
191
  """UPDATE users
192
  SET status = 'active', last_login = GETDATE()
@@ -200,44 +251,15 @@ FROM accounts
200
  WHERE email LIKE '%@example.com' AND created_at >= '2025-01-01';""",
201
  ]
202
 
203
- with gr.Blocks(title="SQL → Human: CRUD Explainer") as demo:
204
- gr.HTML("<h1 style='text-align:center;'>SQL → Human: CRUD Explainer</h1>")
205
- gr.Markdown(
206
- "Paste any SQL (INSERT/UPDATE/DELETE/SELECT). "
207
- "The app will detect the command, create a deterministic summary, "
208
- "and optionally rephrase it using a SQL→English model."
209
- )
210
-
211
- with gr.Row():
212
- sql_in = gr.Textbox(label="SQL input", lines=14, placeholder="Paste your SQL here...")
213
- with gr.Row():
214
- use_t5 = gr.Checkbox(label="Use T5 rephrase (mrm8488/t5-base-finetuned-wikiSQL-sql-to-en)", value=True)
215
 
216
- with gr.Row():
217
- btn = gr.Button("Explain SQL", variant="primary")
218
 
219
- detected_out = gr.Label(label="Detected Command")
220
- deterministic_out = gr.Markdown(label="Deterministic Summary")
221
- final_out = gr.Markdown(label="Final Explanation")
222
 
223
- def explain(sql_text: str, want_t5: bool):
224
- sql_text = (sql_text or "").strip()
225
- cmd = detect_command(sql_text)
226
- deterministic = deterministic_summary(sql_text)
227
- if want_t5 and _HAS_T5 and cmd != "OTHER":
228
- final = rephrase_with_t5(deterministic, cmd)
229
- else:
230
- final = clean_statement(deterministic)
231
- return {"Detected Command": cmd}, deterministic, final
232
-
233
- btn.click(explain, inputs=[sql_in, use_t5], outputs=[detected_out, deterministic_out, final_out])
234
-
235
- gr.Examples(
236
- examples=EXAMPLES,
237
- inputs=[sql_in],
238
- label="Try examples",
239
- examples_per_page=4
240
- )
241
 
242
- if __name__ == "__main__":
243
- demo.launch()
 
7
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
8
 
9
  CRUD_VERB = {
10
+ "INSERT": "load",
11
+ "UPDATE": "changed",
12
  "DELETE": "deleted",
13
+ "SELECT": "specified",
14
  }
15
 
16
+ # ---------- helpers ----------
17
+ def plural(n: int, singular: str = "record", plural_word: str = "records") -> str:
18
+ return f"{n} {singular if n == 1 else plural_word}"
19
+
20
  def detect_command(sql_text: str) -> str:
21
  m = re.search(r"\b(INSERT|UPDATE|DELETE|SELECT)\b", sql_text, flags=re.IGNORECASE)
22
  return m.group(1).upper() if m else "OTHER"
 
26
  r"INSERT\s+INTO\s+([A-Za-z0-9\.\[\]_]+)",
27
  r"UPDATE\s+([A-Za-z0-9\.\[\]_]+)",
28
  r"DELETE\s+FROM\s+([A-Za-z0-9\.\[\]_]+)",
29
+ r"\bFROM\s+([A-Za-z0-9\.\[\]_]+)",
30
  ]:
31
  m = re.search(pat, sql, flags=re.IGNORECASE)
32
  if m:
 
39
  text += "."
40
  return text[0].upper() + text[1:] if text else text
41
 
42
+ def infer_in_list_count(where_clause: str) -> None:
43
+ if not where_clause:
44
+ return None
45
+ m = re.search(r"\bIN\s*\(\s*([^)]+?)\s*\)", where_clause, flags=re.IGNORECASE | re.DOTALL)
46
+ if not m:
47
+ return None
48
+ items = [x.strip() for x in m.group(1).split(",") if x.strip()]
49
+ return len(items) if items else None
50
+
51
+ # ---------- deterministic CRUD summaries ----------
52
  def summarize_insert(sql: str) -> str:
53
  table = parse_table_name(sql)
54
  cols_match = re.search(r"\(\s*([^)]+?)\s*\)\s*VALUES", sql, flags=re.IGNORECASE | re.DOTALL)
 
75
 
76
  count = len(tuples)
77
  verb = CRUD_VERB["INSERT"]
78
+
79
  if user_ids:
80
  groups = {}
81
  for u in user_ids:
82
  groups[u] = groups.get(u, 0) + 1
83
  if len(groups) == 1:
84
  uid = next(iter(groups))
85
+ return f"{count} record(s) {verb} into table {table} (column user_id {uid})."
86
  else:
87
+ parts = [f"{n} with column user_id {uid}" for uid, n in sorted(groups.items())]
88
+ return f"{count} record(s) {verb} into table {table} ({', '.join(parts)})."
89
  else:
90
+ return f"{count} record(s) {verb} into table {table}."
91
+
92
 
93
  def summarize_update(sql: str) -> str:
94
  table = parse_table_name(sql)
 
100
  col = a.split("=")[0].strip().strip("[]")
101
  if col:
102
  changed_cols.append(col)
103
+
104
  where = ""
105
  w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
106
  if w:
107
  where = re.sub(r"\s+", " ", w.group(1)).strip()
108
 
109
  verb = CRUD_VERB["UPDATE"]
110
+ base = f"Record(s) {verb} in table {table}"
111
  if changed_cols:
112
+ cols_txt = ", ".join(changed_cols)
113
+ base += f" (changed: column(s) {cols_txt})"
 
 
114
  if where:
115
+ base += f" with {where}"
116
  return base + "."
117
 
118
+
119
  def summarize_delete(sql: str) -> str:
120
  table = parse_table_name(sql)
121
  where = ""
122
  w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
123
  if w:
124
  where = re.sub(r"\s+", " ", w.group(1)).strip()
125
+
126
+ n_targets = infer_in_list_count(where)
127
  verb = CRUD_VERB["DELETE"]
128
+
129
+ base = (f"{plural(n_targets, 'record', 'records')} {verb} from {table}"
130
+ if n_targets else f"Records {verb} from {table}")
131
  if where:
132
+ base += f" that match {where}"
133
  return base + "."
134
 
135
  def summarize_select(sql: str) -> str:
 
146
  verb = CRUD_VERB["SELECT"]
147
  base = f"{cols} will be {verb} from {table}"
148
  if where:
149
+ base += f" that match {where}"
150
  return base + "."
151
 
152
  def deterministic_summary(sql_text: str) -> str:
 
161
  return summarize_select(sql_text)
162
  return "Unrecognized SQL command."
163
 
164
+ # ---------- optional T5 rephrase ----------
165
  _HAS_T5 = False
166
  try:
167
  from transformers import T5Tokenizer, T5ForConditionalGeneration
 
177
  "UPDATE": "Rewrite as a clear statement that existing records will be updated. Keep names and conditions.",
178
  "DELETE": "Rewrite as a clear statement that records will be deleted. Keep conditions if present.",
179
  "SELECT": "Rewrite as a clear statement that data will be retrieved. Keep table/filters.",
180
+ "OTHER": "Rewrite as a short, clear statement for non-technical users.",
181
  }
182
 
183
  def load_t5():
 
191
  return summary
192
  load_t5()
193
  instruction = CRUD_PROMPT.get(cmd, CRUD_PROMPT["OTHER"])
194
+ input_text = f"explain sql in plain english statement: {instruction} {summary}"
 
195
  feats = _T5_TOKENIZER([input_text], return_tensors="pt")
196
  out = _T5_MODEL.generate(
197
  input_ids=feats["input_ids"],
 
202
  decoded = _T5_TOKENIZER.decode(out[0], skip_special_tokens=True)
203
  return clean_statement(decoded)
204
 
205
+ def _bad_rephrase(text: str) -> bool:
206
+ if text is None:
207
+ return True
208
+ t = str(text).strip()
209
+ if len(t) >= 2 and ((t[0] == t[-1] == '"') or (t[0] == t[-1] == "'")):
210
+ t = t[1:-1].strip()
211
+ t_norm = re.sub(r"[\s\.\!\?]+$", "", t).strip().lower()
212
+ # added "true" here to force fallback for True./False. answers
213
+ if t_norm in ("false", "true", "none", "null", "n/a", "na", ""):
214
+ return True
215
+ if re.match(r"^(what|which|how)\b", t_norm, re.IGNORECASE):
216
+ return True
217
+ return len(t_norm) < 3
218
+
219
+ def explain(sql_text: str):
220
+ sql_text = (sql_text or "").strip()
221
+ cmd = detect_command(sql_text)
222
+ deterministic = deterministic_summary(sql_text)
223
 
224
+ final = None
225
+ if _HAS_T5 and cmd != "OTHER":
226
+ try:
227
+ final = rephrase_with_t5(deterministic, cmd)
228
+ except Exception as e:
229
+ print(f"T5 rephrase failed: {e}")
230
+ final = None
231
+
232
+ if _bad_rephrase(final):
233
+ final = deterministic
234
+ return final
235
+
236
+ # ---------- UI ----------
237
  EXAMPLES = [
238
  # INSERT
239
  """INSERT INTO demo_database..user_records (record_id, person_id, created_at)
240
+ VALUES (101, 5, GETDATE()), (102, 5, GETDATE()), (103, 5, GETDATE());""",
241
  # UPDATE
242
  """UPDATE users
243
  SET status = 'active', last_login = GETDATE()
 
251
  WHERE email LIKE '%@example.com' AND created_at >= '2025-01-01';""",
252
  ]
253
 
254
+ with gr.Blocks(theme=gr.themes.Glass()) as demo:
255
+ gr.Markdown("## 📝 CRUD-SQL2Text")
 
 
 
 
 
 
 
 
 
 
256
 
257
+ sql_in = gr.Textbox(label="Enter SQL Query", lines=8, placeholder="Paste your SQL statement here...")
258
+ final_out = gr.Textbox(label="Natural Language Output", lines=3)
259
 
260
+ btn = gr.Button("Explain SQL")
261
+ btn.click(explain, inputs=[sql_in], outputs=[final_out])
 
262
 
263
+ gr.Examples(examples=EXAMPLES, inputs=[sql_in], outputs=[final_out], fn=explain, cache_examples=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
+ demo.launch(share=True)