wifix199 commited on
Commit
bad52c6
Β·
verified Β·
1 Parent(s): 12fc0b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -49
app.py CHANGED
@@ -1,64 +1,179 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
 
 
 
 
 
6
  """
7
- client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
8
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
41
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
  demo.launch()
 
1
+ #!/usr/bin/env python3
 
 
2
  """
3
+ ai_csv_editor_hf.py ── AI-powered CSV editor using a Hugging Face model on CPU.
4
+
5
+ Features:
6
+ - Upload one or more CSV files (main + optional lookup tables)
7
+ - Type spreadsheet-style commands: CONCAT, VLOOKUP, XLOOKUP, SUMIF
8
+ - LLM (google/flan-t5-base) converts commands β†’ JSON β€œedit plan”
9
+ - pandas applies each action in sequence
10
+ - Preview first 20 rows & download modified CSV
11
  """
 
12
 
13
+ import json
14
+ import io
15
+ import tempfile
16
+ import textwrap
17
+ import pathlib
18
+ from typing import List, Dict, Any
19
 
20
+ import pandas as pd
21
+ import gradio as gr
22
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
23
 
24
+ # ──────────────────────────────────────────────────────────
25
+ # 1. LOAD A SMALL INSTRUCTION-FOLLOWING MODEL (CPU only)
26
+ # ──────────────────────────────────────────────────────────
27
+ MODEL_NAME = "google/flan-t5-base"
28
+ MAX_NEW_TOK = 256
29
+ TEMPERATURE = 0.0
30
 
31
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
32
+ model = AutoModelForSeq2SeqLM.from_pretrained(
33
+ MODEL_NAME,
34
+ device_map="cpu", # force CPU
35
+ torch_dtype="auto"
36
+ )
37
+ generator = pipeline(
38
+ "text2text-generation",
39
+ model=model,
40
+ tokenizer=tokenizer,
41
+ device=-1, # -1 = CPU
42
+ )
43
 
44
+ # ──────────────────────────────────────────────────────────
45
+ # 2. PROMPT β†’ JSON β€œEDIT PLAN”
46
+ # ──────────────────────────────────────────────────────────
47
+ SYSTEM_PROMPT = textwrap.dedent("""\
48
+ You are an assistant that converts natural-language spreadsheet commands
49
+ into JSON edit plans. Respond with ONLY valid JSON matching this schema:
50
 
51
+ {
52
+ "actions": [
53
+ {
54
+ "operation": "concat | vlookup | xlookup | sumif",
55
+ "target": "string",
 
 
 
56
 
57
+ # For CONCAT:
58
+ "columns": ["colA","colB"],
59
+ "separator": " ",
60
 
61
+ # For VLOOKUP / XLOOKUP:
62
+ "lookup_value": "KeyInMain",
63
+ "lookup_file": "other.csv",
64
+ "lookup_column": "KeyInOther",
65
+ "return_column": "Value",
66
+ "exact": true,
67
 
68
+ # For SUMIF:
69
+ "criteria_column": "Category",
70
+ "criteria": "Foo",
71
+ "sum_column": "Amount"
72
+ }
73
+ ]
74
+ }
75
+ """)
76
+
77
+ def plan_from_command(cmd: str) -> Dict[str, Any]:
78
+ prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
79
+ output = generator(
80
+ prompt,
81
+ max_new_tokens=MAX_NEW_TOK,
82
+ temperature=TEMPERATURE,
83
+ do_sample=False,
84
+ )[0]["generated_text"]
85
+ try:
86
+ return json.loads(output)
87
+ except json.JSONDecodeError as e:
88
+ raise ValueError(f"Model returned invalid JSON:\n{output}") from e
89
+
90
+ # ──────────────────────────────────────────────────────────
91
+ # 3. DATA OPERATIONS
92
+ # ──────────────────────────────────────────────────────────
93
+ def apply_action(df: pd.DataFrame,
94
+ uploads: Dict[str, pd.DataFrame],
95
+ act: Dict[str, Any]) -> pd.DataFrame:
96
+ op = act["operation"]
97
+
98
+ if op == "concat":
99
+ sep = act.get("separator", "")
100
+ df[act["target"]] = (
101
+ df[act["columns"]]
102
+ .astype(str)
103
+ .agg(sep.join, axis=1)
104
+ )
105
+
106
+ elif op in {"vlookup", "xlookup"}:
107
+ lookup_df = uploads[act["lookup_file"]]
108
+ # select only the two relevant columns and rename for merging
109
+ right = lookup_df[[act["lookup_column"], act["return_column"]]] \
110
+ .rename(columns={
111
+ act["lookup_column"]: act["lookup_value"],
112
+ act["return_column"]: act["target"]
113
+ })
114
+ df = df.merge(right, on=act["lookup_value"], how="left")
115
+
116
+ elif op == "sumif":
117
+ mask = df[act["criteria_column"]] == act["criteria"]
118
+ total = df.loc[mask, act["sum_column"]].sum()
119
+ df[act["target"]] = total
120
+
121
+ else:
122
+ raise ValueError(f"Unsupported operation: {op}")
123
+
124
+ return df
125
+
126
+ # ──────────────────────────────────────────────────────────
127
+ # 4. GRADIO UI
128
+ # ──────────────────────────────────────────────────────────
129
+ def run_editor(files: List[gr.File], command: str):
130
+ if not files:
131
+ return None, "⚠️ Please upload at least one CSV file.", None
132
+
133
+ # Load uploaded CSVs into a dictionary
134
+ uploads = {
135
+ pathlib.Path(f.name).name: pd.read_csv(f.name)
136
+ for f in files
137
+ }
138
+ # Treat the first file as the main dataset
139
+ main_name = list(uploads.keys())[0]
140
+ df = uploads[main_name]
141
+
142
+ # Generate plan
143
+ try:
144
+ plan = plan_from_command(command)
145
+ except Exception as e:
146
+ return None, f"❌ LLM error: {e}", None
147
+
148
+ # Apply actions
149
+ try:
150
+ for act in plan["actions"]:
151
+ df = apply_action(df, uploads, act)
152
+ except Exception as e:
153
+ return None, f"❌ Execution error: {e}", None
154
+
155
+ # Write modified CSV to a temp file and return
156
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
157
+ df.to_csv(tmp.name, index=False)
158
+ return df.head(20), "βœ… Success! Download below.", tmp.name
159
+
160
+ with gr.Blocks(title="AI CSV Editor (HF, CPU)") as demo:
161
+ gr.Markdown("## AI-powered CSV Editor \n"
162
+ "1. Upload one main CSV (first) plus any lookup tables \n"
163
+ "2. Type a spreadsheet-style instruction \n"
164
+ "3. Download the modified CSV")
165
+ csv_files = gr.Files(file_types=[".csv"], label="Upload CSV file(s)")
166
+ cmd_box = gr.Textbox(lines=2, placeholder="e.g. concat First Last β†’ FullName")
167
+ run_btn = gr.Button("Apply")
168
+ preview = gr.Dataframe(label="Preview (first 20 rows)")
169
+ status = gr.Markdown()
170
+ download = gr.File(label="Download Result")
171
 
172
+ run_btn.click(
173
+ fn=run_editor,
174
+ inputs=[csv_files, cmd_box],
175
+ outputs=[preview, status, download]
176
+ )
177
 
178
  if __name__ == "__main__":
179
  demo.launch()